300 lines
8.7 KiB
Diff
300 lines
8.7 KiB
Diff
From f9072731bedac6f6373dd75798b5a801ce614c02 Mon Sep 17 00:00:00 2001
|
|
From: Arianna Avanzini <avanzini.arianna@gmail.com>
|
|
Date: Mon, 19 Dec 2011 16:33:41 +0100
|
|
Subject: [PATCH 1/3] block: prepare I/O context code for BFQ-v5 for 3.2
|
|
|
|
BFQ uses struct cfq_io_context to store its per-process per-device data,
|
|
reusing the same code for cic handling of CFQ. The code is not shared
|
|
ATM to minimize the impact of these patches.
|
|
|
|
This patch introduces a new hlist to each io_context to store all the
|
|
cic's allocated by BFQ to allow calling the right destructor on module
|
|
unload; the radix tree used for cic lookup needs to be duplicated
|
|
because it can contain dead keys inserted by a scheduler and later
|
|
retrieved by the other one.
|
|
|
|
Update the io_context exit and free paths to take care also of
|
|
the BFQ cic's.
|
|
|
|
Change the type of cfqq inside struct cfq_io_context to void *
|
|
to use it also for BFQ per-queue data.
|
|
|
|
A new bfq-specific ioprio_changed field is necessary, too, to avoid
|
|
clobbering cfq's one, so switch ioprio_changed to a bitmap, with one
|
|
element per scheduler.
|
|
|
|
Signed-off-by: Fabio Checconi <fabio@gandalf.sssup.it>
|
|
Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
|
|
Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
|
|
---
|
|
block/Kconfig.iosched | 26 ++++++++++++++++++++++++++
|
|
block/blk-ioc.c | 30 +++++++++++++++++-------------
|
|
block/cfq-iosched.c | 10 +++++++---
|
|
fs/ioprio.c | 9 +++++++--
|
|
include/linux/iocontext.h | 18 +++++++++++++++---
|
|
5 files changed, 72 insertions(+), 21 deletions(-)
|
|
|
|
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
|
|
index 3199b76..5905452 100644
|
|
--- a/block/Kconfig.iosched
|
|
+++ b/block/Kconfig.iosched
|
|
@@ -43,6 +43,28 @@ config CFQ_GROUP_IOSCHED
|
|
---help---
|
|
Enable group IO scheduling in CFQ.
|
|
|
|
+config IOSCHED_BFQ
|
|
+ tristate "BFQ I/O scheduler"
|
|
+ depends on EXPERIMENTAL
|
|
+ default n
|
|
+ ---help---
|
|
+ The BFQ I/O scheduler tries to distribute bandwidth among
|
|
+ all processes according to their weights.
|
|
+ It aims at distributing the bandwidth as desired, independently of
|
|
+ the disk parameters and with any workload. It also tries to
|
|
+ guarantee low latency to interactive and soft real-time
|
|
+ applications. If compiled built-in (saying Y here), BFQ can
|
|
+ be configured to support hierarchical scheduling.
|
|
+
|
|
+config CGROUP_BFQIO
|
|
+ bool "BFQ hierarchical scheduling support"
|
|
+ depends on CGROUPS && IOSCHED_BFQ=y
|
|
+ default n
|
|
+ ---help---
|
|
+ Enable hierarchical scheduling in BFQ, using the cgroups
|
|
+ filesystem interface. The name of the subsystem will be
|
|
+ bfqio.
|
|
+
|
|
choice
|
|
prompt "Default I/O scheduler"
|
|
default DEFAULT_CFQ
|
|
@@ -56,6 +78,9 @@ choice
|
|
config DEFAULT_CFQ
|
|
bool "CFQ" if IOSCHED_CFQ=y
|
|
|
|
+ config DEFAULT_BFQ
|
|
+ bool "BFQ" if IOSCHED_BFQ=y
|
|
+
|
|
config DEFAULT_NOOP
|
|
bool "No-op"
|
|
|
|
@@ -65,6 +90,7 @@ config DEFAULT_IOSCHED
|
|
string
|
|
default "deadline" if DEFAULT_DEADLINE
|
|
default "cfq" if DEFAULT_CFQ
|
|
+ default "bfq" if DEFAULT_BFQ
|
|
default "noop" if DEFAULT_NOOP
|
|
|
|
endmenu
|
|
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
|
|
index 6f9bbd9..d0d16d4 100644
|
|
--- a/block/blk-ioc.c
|
|
+++ b/block/blk-ioc.c
|
|
@@ -5,6 +5,7 @@
|
|
#include <linux/module.h>
|
|
#include <linux/init.h>
|
|
#include <linux/bio.h>
|
|
+#include <linux/bitmap.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
|
|
#include <linux/slab.h>
|
|
@@ -16,13 +17,12 @@
|
|
*/
|
|
static struct kmem_cache *iocontext_cachep;
|
|
|
|
-static void cfq_dtor(struct io_context *ioc)
|
|
+static void hlist_sched_dtor(struct io_context *ioc, struct hlist_head *list)
|
|
{
|
|
- if (!hlist_empty(&ioc->cic_list)) {
|
|
+ if (!hlist_empty(list)) {
|
|
struct cfq_io_context *cic;
|
|
|
|
- cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context,
|
|
- cic_list);
|
|
+ cic = hlist_entry(list->first, struct cfq_io_context, cic_list);
|
|
cic->dtor(ioc);
|
|
}
|
|
}
|
|
@@ -40,7 +40,9 @@ int put_io_context(struct io_context *ioc)
|
|
|
|
if (atomic_long_dec_and_test(&ioc->refcount)) {
|
|
rcu_read_lock();
|
|
- cfq_dtor(ioc);
|
|
+
|
|
+ hlist_sched_dtor(ioc, &ioc->cic_list);
|
|
+ hlist_sched_dtor(ioc, &ioc->bfq_cic_list);
|
|
rcu_read_unlock();
|
|
|
|
kmem_cache_free(iocontext_cachep, ioc);
|
|
@@ -50,15 +52,14 @@ int put_io_context(struct io_context *ioc)
|
|
}
|
|
EXPORT_SYMBOL(put_io_context);
|
|
|
|
-static void cfq_exit(struct io_context *ioc)
|
|
+static void hlist_sched_exit(struct io_context *ioc, struct hlist_head *list)
|
|
{
|
|
rcu_read_lock();
|
|
|
|
- if (!hlist_empty(&ioc->cic_list)) {
|
|
+ if (!hlist_empty(list)) {
|
|
struct cfq_io_context *cic;
|
|
|
|
- cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context,
|
|
- cic_list);
|
|
+ cic = hlist_entry(list->first, struct cfq_io_context, cic_list);
|
|
cic->exit(ioc);
|
|
}
|
|
rcu_read_unlock();
|
|
@@ -74,9 +75,10 @@ void exit_io_context(struct task_struct *task)
|
|
task->io_context = NULL;
|
|
task_unlock(task);
|
|
|
|
- if (atomic_dec_and_test(&ioc->nr_tasks))
|
|
- cfq_exit(ioc);
|
|
-
|
|
+ if (atomic_dec_and_test(&ioc->nr_tasks)) {
|
|
+ hlist_sched_exit(ioc, &ioc->cic_list);
|
|
+ hlist_sched_exit(ioc, &ioc->bfq_cic_list);
|
|
+ }
|
|
put_io_context(ioc);
|
|
}
|
|
|
|
@@ -89,12 +91,14 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
|
|
atomic_long_set(&ioc->refcount, 1);
|
|
atomic_set(&ioc->nr_tasks, 1);
|
|
spin_lock_init(&ioc->lock);
|
|
- ioc->ioprio_changed = 0;
|
|
+ bitmap_zero(ioc->ioprio_changed, IOC_IOPRIO_CHANGED_BITS);
|
|
ioc->ioprio = 0;
|
|
ioc->last_waited = 0; /* doesn't matter... */
|
|
ioc->nr_batch_requests = 0; /* because this is 0 */
|
|
INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
|
|
INIT_HLIST_HEAD(&ioc->cic_list);
|
|
+ INIT_RADIX_TREE(&ioc->bfq_radix_root, GFP_ATOMIC | __GFP_HIGH);
|
|
+ INIT_HLIST_HEAD(&ioc->bfq_cic_list);
|
|
ioc->ioc_data = NULL;
|
|
#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
|
|
ioc->cgroup_changed = 0;
|
|
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
|
|
index 3548705..a120a31 100644
|
|
--- a/block/cfq-iosched.c
|
|
+++ b/block/cfq-iosched.c
|
|
@@ -2946,7 +2946,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
|
|
static void cfq_ioc_set_ioprio(struct io_context *ioc)
|
|
{
|
|
call_for_each_cic(ioc, changed_ioprio);
|
|
- ioc->ioprio_changed = 0;
|
|
}
|
|
|
|
static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
|
|
@@ -3238,8 +3237,13 @@ retry:
|
|
goto err_free;
|
|
|
|
out:
|
|
- smp_read_barrier_depends();
|
|
- if (unlikely(ioc->ioprio_changed))
|
|
+ /*
|
|
+ * test_and_clear_bit() implies a memory barrier, paired with
|
|
+ * the wmb() in fs/ioprio.c, so the value seen for ioprio is the
|
|
+ * new one.
|
|
+ */
|
|
+ if (unlikely(test_and_clear_bit(IOC_CFQ_IOPRIO_CHANGED,
|
|
+ ioc->ioprio_changed)))
|
|
cfq_ioc_set_ioprio(ioc);
|
|
|
|
#ifdef CONFIG_CFQ_GROUP_IOSCHED
|
|
diff --git a/fs/ioprio.c b/fs/ioprio.c
|
|
index f79dab8..6b0cb885 100644
|
|
--- a/fs/ioprio.c
|
|
+++ b/fs/ioprio.c
|
|
@@ -31,7 +31,7 @@
|
|
|
|
int set_task_ioprio(struct task_struct *task, int ioprio)
|
|
{
|
|
- int err;
|
|
+ int err, i;
|
|
struct io_context *ioc;
|
|
const struct cred *cred = current_cred(), *tcred;
|
|
|
|
@@ -61,12 +61,17 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
|
|
err = -ENOMEM;
|
|
break;
|
|
}
|
|
+ /* let other ioc users see the new values */
|
|
+ smp_wmb();
|
|
task->io_context = ioc;
|
|
} while (1);
|
|
|
|
if (!err) {
|
|
ioc->ioprio = ioprio;
|
|
- ioc->ioprio_changed = 1;
|
|
+ /* make sure schedulers see the new ioprio value */
|
|
+ wmb();
|
|
+ for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++)
|
|
+ set_bit(i, ioc->ioprio_changed);
|
|
}
|
|
|
|
task_unlock(task);
|
|
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
|
|
index 5037a0a..69fdd58 100644
|
|
--- a/include/linux/iocontext.h
|
|
+++ b/include/linux/iocontext.h
|
|
@@ -1,10 +1,10 @@
|
|
#ifndef IOCONTEXT_H
|
|
#define IOCONTEXT_H
|
|
|
|
+#include <linux/bitmap.h>
|
|
#include <linux/radix-tree.h>
|
|
#include <linux/rcupdate.h>
|
|
|
|
-struct cfq_queue;
|
|
struct cfq_ttime {
|
|
unsigned long last_end_request;
|
|
|
|
@@ -16,7 +16,7 @@ struct cfq_ttime {
|
|
struct cfq_io_context {
|
|
void *key;
|
|
|
|
- struct cfq_queue *cfqq[2];
|
|
+ void *cfqq[2];
|
|
|
|
struct io_context *ioc;
|
|
|
|
@@ -32,6 +32,16 @@ struct cfq_io_context {
|
|
};
|
|
|
|
/*
|
|
+ * Indexes into the ioprio_changed bitmap. A bit set indicates that
|
|
+ * the corresponding I/O scheduler needs to see a ioprio update.
|
|
+ */
|
|
+enum {
|
|
+ IOC_CFQ_IOPRIO_CHANGED,
|
|
+ IOC_BFQ_IOPRIO_CHANGED,
|
|
+ IOC_IOPRIO_CHANGED_BITS
|
|
+};
|
|
+
|
|
+/*
|
|
* I/O subsystem state of the associated processes. It is refcounted
|
|
* and kmalloc'ed. These could be shared between processes.
|
|
*/
|
|
@@ -43,7 +53,7 @@ struct io_context {
|
|
spinlock_t lock;
|
|
|
|
unsigned short ioprio;
|
|
- unsigned short ioprio_changed;
|
|
+ DECLARE_BITMAP(ioprio_changed, IOC_IOPRIO_CHANGED_BITS);
|
|
|
|
#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
|
|
unsigned short cgroup_changed;
|
|
@@ -57,6 +67,8 @@ struct io_context {
|
|
|
|
struct radix_tree_root radix_root;
|
|
struct hlist_head cic_list;
|
|
+ struct radix_tree_root bfq_radix_root;
|
|
+ struct hlist_head bfq_cic_list;
|
|
void __rcu *ioc_data;
|
|
};
|
|
|
|
--
|
|
1.7.10.4
|
|
|