|
|
|
@@ -818,7 +818,7 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
|
|
|
|
|
|
|
|
|
|
for_each_subsys(cgrp->root, ss)
|
|
|
|
|
if (ss->pre_destroy) {
|
|
|
|
|
ret = ss->pre_destroy(ss, cgrp);
|
|
|
|
|
ret = ss->pre_destroy(cgrp);
|
|
|
|
|
if (ret)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
@@ -846,7 +846,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
|
|
|
|
|
* Release the subsystem state objects.
|
|
|
|
|
*/
|
|
|
|
|
for_each_subsys(cgrp->root, ss)
|
|
|
|
|
ss->destroy(ss, cgrp);
|
|
|
|
|
ss->destroy(cgrp);
|
|
|
|
|
|
|
|
|
|
cgrp->root->number_of_cgroups--;
|
|
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
|
|
@@ -1015,7 +1015,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
|
|
|
|
|
list_move(&ss->sibling, &root->subsys_list);
|
|
|
|
|
ss->root = root;
|
|
|
|
|
if (ss->bind)
|
|
|
|
|
ss->bind(ss, cgrp);
|
|
|
|
|
ss->bind(cgrp);
|
|
|
|
|
mutex_unlock(&ss->hierarchy_mutex);
|
|
|
|
|
/* refcount was already taken, and we're keeping it */
|
|
|
|
|
} else if (bit & removed_bits) {
|
|
|
|
@@ -1025,7 +1025,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
|
|
|
|
|
BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
|
|
|
|
|
mutex_lock(&ss->hierarchy_mutex);
|
|
|
|
|
if (ss->bind)
|
|
|
|
|
ss->bind(ss, dummytop);
|
|
|
|
|
ss->bind(dummytop);
|
|
|
|
|
dummytop->subsys[i]->cgroup = dummytop;
|
|
|
|
|
cgrp->subsys[i] = NULL;
|
|
|
|
|
subsys[i]->root = &rootnode;
|
|
|
|
@@ -1763,6 +1763,7 @@ EXPORT_SYMBOL_GPL(cgroup_path);
|
|
|
|
|
struct task_and_cgroup {
|
|
|
|
|
struct task_struct *task;
|
|
|
|
|
struct cgroup *cgrp;
|
|
|
|
|
struct css_set *cg;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct cgroup_taskset {
|
|
|
|
@@ -1843,11 +1844,10 @@ EXPORT_SYMBOL_GPL(cgroup_taskset_size);
|
|
|
|
|
* will already exist. If not set, this function might sleep, and can fail with
|
|
|
|
|
* -ENOMEM. Must be called with cgroup_mutex and threadgroup locked.
|
|
|
|
|
*/
|
|
|
|
|
static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
|
|
|
|
|
struct task_struct *tsk, bool guarantee)
|
|
|
|
|
static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
|
|
|
|
|
struct task_struct *tsk, struct css_set *newcg)
|
|
|
|
|
{
|
|
|
|
|
struct css_set *oldcg;
|
|
|
|
|
struct css_set *newcg;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We are synchronized through threadgroup_lock() against PF_EXITING
|
|
|
|
@@ -1857,23 +1857,6 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
|
|
|
|
|
WARN_ON_ONCE(tsk->flags & PF_EXITING);
|
|
|
|
|
oldcg = tsk->cgroups;
|
|
|
|
|
|
|
|
|
|
/* locate or allocate a new css_set for this task. */
|
|
|
|
|
if (guarantee) {
|
|
|
|
|
/* we know the css_set we want already exists. */
|
|
|
|
|
struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
|
|
|
|
|
read_lock(&css_set_lock);
|
|
|
|
|
newcg = find_existing_css_set(oldcg, cgrp, template);
|
|
|
|
|
BUG_ON(!newcg);
|
|
|
|
|
get_css_set(newcg);
|
|
|
|
|
read_unlock(&css_set_lock);
|
|
|
|
|
} else {
|
|
|
|
|
might_sleep();
|
|
|
|
|
/* find_css_set will give us newcg already referenced. */
|
|
|
|
|
newcg = find_css_set(oldcg, cgrp);
|
|
|
|
|
if (!newcg)
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
task_lock(tsk);
|
|
|
|
|
rcu_assign_pointer(tsk->cgroups, newcg);
|
|
|
|
|
task_unlock(tsk);
|
|
|
|
@@ -1892,7 +1875,6 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
|
|
|
|
|
put_css_set(oldcg);
|
|
|
|
|
|
|
|
|
|
set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@@ -1910,6 +1892,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
|
|
|
|
|
struct cgroup *oldcgrp;
|
|
|
|
|
struct cgroupfs_root *root = cgrp->root;
|
|
|
|
|
struct cgroup_taskset tset = { };
|
|
|
|
|
struct css_set *newcg;
|
|
|
|
|
|
|
|
|
|
/* @tsk either already exited or can't exit until the end */
|
|
|
|
|
if (tsk->flags & PF_EXITING)
|
|
|
|
@@ -1925,7 +1908,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
|
|
|
|
|
|
|
|
|
|
for_each_subsys(root, ss) {
|
|
|
|
|
if (ss->can_attach) {
|
|
|
|
|
retval = ss->can_attach(ss, cgrp, &tset);
|
|
|
|
|
retval = ss->can_attach(cgrp, &tset);
|
|
|
|
|
if (retval) {
|
|
|
|
|
/*
|
|
|
|
|
* Remember on which subsystem the can_attach()
|
|
|
|
@@ -1939,13 +1922,17 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
|
|
|
|
|
if (retval)
|
|
|
|
|
newcg = find_css_set(tsk->cgroups, cgrp);
|
|
|
|
|
if (!newcg) {
|
|
|
|
|
retval = -ENOMEM;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg);
|
|
|
|
|
|
|
|
|
|
for_each_subsys(root, ss) {
|
|
|
|
|
if (ss->attach)
|
|
|
|
|
ss->attach(ss, cgrp, &tset);
|
|
|
|
|
ss->attach(cgrp, &tset);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
synchronize_rcu();
|
|
|
|
@@ -1967,7 +1954,7 @@ out:
|
|
|
|
|
*/
|
|
|
|
|
break;
|
|
|
|
|
if (ss->cancel_attach)
|
|
|
|
|
ss->cancel_attach(ss, cgrp, &tset);
|
|
|
|
|
ss->cancel_attach(cgrp, &tset);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return retval;
|
|
|
|
@@ -1997,66 +1984,6 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
|
|
|
|
|
}
|
|
|
|
|
EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* cgroup_attach_proc works in two stages, the first of which prefetches all
|
|
|
|
|
* new css_sets needed (to make sure we have enough memory before committing
|
|
|
|
|
* to the move) and stores them in a list of entries of the following type.
|
|
|
|
|
* TODO: possible optimization: use css_set->rcu_head for chaining instead
|
|
|
|
|
*/
|
|
|
|
|
struct cg_list_entry {
|
|
|
|
|
struct css_set *cg;
|
|
|
|
|
struct list_head links;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static bool css_set_check_fetched(struct cgroup *cgrp,
|
|
|
|
|
struct task_struct *tsk, struct css_set *cg,
|
|
|
|
|
struct list_head *newcg_list)
|
|
|
|
|
{
|
|
|
|
|
struct css_set *newcg;
|
|
|
|
|
struct cg_list_entry *cg_entry;
|
|
|
|
|
struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
|
|
|
|
|
|
|
|
|
|
read_lock(&css_set_lock);
|
|
|
|
|
newcg = find_existing_css_set(cg, cgrp, template);
|
|
|
|
|
read_unlock(&css_set_lock);
|
|
|
|
|
|
|
|
|
|
/* doesn't exist at all? */
|
|
|
|
|
if (!newcg)
|
|
|
|
|
return false;
|
|
|
|
|
/* see if it's already in the list */
|
|
|
|
|
list_for_each_entry(cg_entry, newcg_list, links)
|
|
|
|
|
if (cg_entry->cg == newcg)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
/* not found */
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Find the new css_set and store it in the list in preparation for moving the
|
|
|
|
|
* given task to the given cgroup. Returns 0 or -ENOMEM.
|
|
|
|
|
*/
|
|
|
|
|
static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
|
|
|
|
|
struct list_head *newcg_list)
|
|
|
|
|
{
|
|
|
|
|
struct css_set *newcg;
|
|
|
|
|
struct cg_list_entry *cg_entry;
|
|
|
|
|
|
|
|
|
|
/* ensure a new css_set will exist for this thread */
|
|
|
|
|
newcg = find_css_set(cg, cgrp);
|
|
|
|
|
if (!newcg)
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
/* add it to the list */
|
|
|
|
|
cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL);
|
|
|
|
|
if (!cg_entry) {
|
|
|
|
|
put_css_set(newcg);
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
}
|
|
|
|
|
cg_entry->cg = newcg;
|
|
|
|
|
list_add(&cg_entry->links, newcg_list);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* cgroup_attach_proc - attach all threads in a threadgroup to a cgroup
|
|
|
|
|
* @cgrp: the cgroup to attach to
|
|
|
|
@@ -2070,20 +1997,12 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
|
|
int retval, i, group_size;
|
|
|
|
|
struct cgroup_subsys *ss, *failed_ss = NULL;
|
|
|
|
|
/* guaranteed to be initialized later, but the compiler needs this */
|
|
|
|
|
struct css_set *oldcg;
|
|
|
|
|
struct cgroupfs_root *root = cgrp->root;
|
|
|
|
|
/* threadgroup list cursor and array */
|
|
|
|
|
struct task_struct *tsk;
|
|
|
|
|
struct task_and_cgroup *tc;
|
|
|
|
|
struct flex_array *group;
|
|
|
|
|
struct cgroup_taskset tset = { };
|
|
|
|
|
/*
|
|
|
|
|
* we need to make sure we have css_sets for all the tasks we're
|
|
|
|
|
* going to move -before- we actually start moving them, so that in
|
|
|
|
|
* case we get an ENOMEM we can bail out before making any changes.
|
|
|
|
|
*/
|
|
|
|
|
struct list_head newcg_list;
|
|
|
|
|
struct cg_list_entry *cg_entry, *temp_nobe;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* step 0: in order to do expensive, possibly blocking operations for
|
|
|
|
@@ -2102,23 +2021,14 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
|
|
if (retval)
|
|
|
|
|
goto out_free_group_list;
|
|
|
|
|
|
|
|
|
|
/* prevent changes to the threadgroup list while we take a snapshot. */
|
|
|
|
|
read_lock(&tasklist_lock);
|
|
|
|
|
if (!thread_group_leader(leader)) {
|
|
|
|
|
/*
|
|
|
|
|
* a race with de_thread from another thread's exec() may strip
|
|
|
|
|
* us of our leadership, making while_each_thread unsafe to use
|
|
|
|
|
* on this task. if this happens, there is no choice but to
|
|
|
|
|
* throw this task away and try again (from cgroup_procs_write);
|
|
|
|
|
* this is "double-double-toil-and-trouble-check locking".
|
|
|
|
|
*/
|
|
|
|
|
read_unlock(&tasklist_lock);
|
|
|
|
|
retval = -EAGAIN;
|
|
|
|
|
goto out_free_group_list;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tsk = leader;
|
|
|
|
|
i = 0;
|
|
|
|
|
/*
|
|
|
|
|
* Prevent freeing of tasks while we take a snapshot. Tasks that are
|
|
|
|
|
* already PF_EXITING could be freed from underneath us unless we
|
|
|
|
|
* take an rcu_read_lock.
|
|
|
|
|
*/
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
do {
|
|
|
|
|
struct task_and_cgroup ent;
|
|
|
|
|
|
|
|
|
@@ -2128,24 +2038,24 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
|
|
|
|
|
|
|
/* as per above, nr_threads may decrease, but not increase. */
|
|
|
|
|
BUG_ON(i >= group_size);
|
|
|
|
|
/*
|
|
|
|
|
* saying GFP_ATOMIC has no effect here because we did prealloc
|
|
|
|
|
* earlier, but it's good form to communicate our expectations.
|
|
|
|
|
*/
|
|
|
|
|
ent.task = tsk;
|
|
|
|
|
ent.cgrp = task_cgroup_from_root(tsk, root);
|
|
|
|
|
/* nothing to do if this task is already in the cgroup */
|
|
|
|
|
if (ent.cgrp == cgrp)
|
|
|
|
|
continue;
|
|
|
|
|
/*
|
|
|
|
|
* saying GFP_ATOMIC has no effect here because we did prealloc
|
|
|
|
|
* earlier, but it's good form to communicate our expectations.
|
|
|
|
|
*/
|
|
|
|
|
retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
|
|
|
|
|
BUG_ON(retval != 0);
|
|
|
|
|
i++;
|
|
|
|
|
} while_each_thread(leader, tsk);
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
/* remember the number of threads in the array for later. */
|
|
|
|
|
group_size = i;
|
|
|
|
|
tset.tc_array = group;
|
|
|
|
|
tset.tc_array_len = group_size;
|
|
|
|
|
read_unlock(&tasklist_lock);
|
|
|
|
|
|
|
|
|
|
/* methods shouldn't be called if no task is actually migrating */
|
|
|
|
|
retval = 0;
|
|
|
|
@@ -2157,7 +2067,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
|
|
*/
|
|
|
|
|
for_each_subsys(root, ss) {
|
|
|
|
|
if (ss->can_attach) {
|
|
|
|
|
retval = ss->can_attach(ss, cgrp, &tset);
|
|
|
|
|
retval = ss->can_attach(cgrp, &tset);
|
|
|
|
|
if (retval) {
|
|
|
|
|
failed_ss = ss;
|
|
|
|
|
goto out_cancel_attach;
|
|
|
|
@@ -2169,17 +2079,12 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
|
|
* step 2: make sure css_sets exist for all threads to be migrated.
|
|
|
|
|
* we use find_css_set, which allocates a new one if necessary.
|
|
|
|
|
*/
|
|
|
|
|
INIT_LIST_HEAD(&newcg_list);
|
|
|
|
|
for (i = 0; i < group_size; i++) {
|
|
|
|
|
tc = flex_array_get(group, i);
|
|
|
|
|
oldcg = tc->task->cgroups;
|
|
|
|
|
|
|
|
|
|
/* if we don't already have it in the list get a new one */
|
|
|
|
|
if (!css_set_check_fetched(cgrp, tc->task, oldcg,
|
|
|
|
|
&newcg_list)) {
|
|
|
|
|
retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
|
|
|
|
|
if (retval)
|
|
|
|
|
goto out_list_teardown;
|
|
|
|
|
tc->cg = find_css_set(tc->task->cgroups, cgrp);
|
|
|
|
|
if (!tc->cg) {
|
|
|
|
|
retval = -ENOMEM;
|
|
|
|
|
goto out_put_css_set_refs;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -2190,8 +2095,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
|
|
*/
|
|
|
|
|
for (i = 0; i < group_size; i++) {
|
|
|
|
|
tc = flex_array_get(group, i);
|
|
|
|
|
retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true);
|
|
|
|
|
BUG_ON(retval);
|
|
|
|
|
cgroup_task_migrate(cgrp, tc->cgrp, tc->task, tc->cg);
|
|
|
|
|
}
|
|
|
|
|
/* nothing is sensitive to fork() after this point. */
|
|
|
|
|
|
|
|
|
@@ -2200,7 +2104,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
|
|
*/
|
|
|
|
|
for_each_subsys(root, ss) {
|
|
|
|
|
if (ss->attach)
|
|
|
|
|
ss->attach(ss, cgrp, &tset);
|
|
|
|
|
ss->attach(cgrp, &tset);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@@ -2209,21 +2113,22 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
|
|
synchronize_rcu();
|
|
|
|
|
cgroup_wakeup_rmdir_waiter(cgrp);
|
|
|
|
|
retval = 0;
|
|
|
|
|
out_list_teardown:
|
|
|
|
|
/* clean up the list of prefetched css_sets. */
|
|
|
|
|
list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) {
|
|
|
|
|
list_del(&cg_entry->links);
|
|
|
|
|
put_css_set(cg_entry->cg);
|
|
|
|
|
kfree(cg_entry);
|
|
|
|
|
out_put_css_set_refs:
|
|
|
|
|
if (retval) {
|
|
|
|
|
for (i = 0; i < group_size; i++) {
|
|
|
|
|
tc = flex_array_get(group, i);
|
|
|
|
|
if (!tc->cg)
|
|
|
|
|
break;
|
|
|
|
|
put_css_set(tc->cg);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
out_cancel_attach:
|
|
|
|
|
/* same deal as in cgroup_attach_task */
|
|
|
|
|
if (retval) {
|
|
|
|
|
for_each_subsys(root, ss) {
|
|
|
|
|
if (ss == failed_ss)
|
|
|
|
|
break;
|
|
|
|
|
if (ss->cancel_attach)
|
|
|
|
|
ss->cancel_attach(ss, cgrp, &tset);
|
|
|
|
|
ss->cancel_attach(cgrp, &tset);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
out_free_group_list:
|
|
|
|
@@ -2245,22 +2150,14 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
|
|
|
|
|
if (!cgroup_lock_live_group(cgrp))
|
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
|
|
retry_find_task:
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
if (pid) {
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
tsk = find_task_by_vpid(pid);
|
|
|
|
|
if (!tsk) {
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
cgroup_unlock();
|
|
|
|
|
return -ESRCH;
|
|
|
|
|
}
|
|
|
|
|
if (threadgroup) {
|
|
|
|
|
/*
|
|
|
|
|
* RCU protects this access, since tsk was found in the
|
|
|
|
|
* tid map. a race with de_thread may cause group_leader
|
|
|
|
|
* to stop being the leader, but cgroup_attach_proc will
|
|
|
|
|
* detect it later.
|
|
|
|
|
*/
|
|
|
|
|
tsk = tsk->group_leader;
|
|
|
|
|
ret= -ESRCH;
|
|
|
|
|
goto out_unlock_cgroup;
|
|
|
|
|
}
|
|
|
|
|
/*
|
|
|
|
|
* even if we're attaching all tasks in the thread group, we
|
|
|
|
@@ -2271,29 +2168,38 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
|
|
|
|
|
cred->euid != tcred->uid &&
|
|
|
|
|
cred->euid != tcred->suid) {
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
cgroup_unlock();
|
|
|
|
|
return -EACCES;
|
|
|
|
|
ret = -EACCES;
|
|
|
|
|
goto out_unlock_cgroup;
|
|
|
|
|
}
|
|
|
|
|
get_task_struct(tsk);
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
} else {
|
|
|
|
|
if (threadgroup)
|
|
|
|
|
tsk = current->group_leader;
|
|
|
|
|
else
|
|
|
|
|
tsk = current;
|
|
|
|
|
get_task_struct(tsk);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
threadgroup_lock(tsk);
|
|
|
|
|
} else
|
|
|
|
|
tsk = current;
|
|
|
|
|
|
|
|
|
|
if (threadgroup)
|
|
|
|
|
ret = cgroup_attach_proc(cgrp, tsk);
|
|
|
|
|
else
|
|
|
|
|
ret = cgroup_attach_task(cgrp, tsk);
|
|
|
|
|
tsk = tsk->group_leader;
|
|
|
|
|
get_task_struct(tsk);
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
|
|
threadgroup_lock(tsk);
|
|
|
|
|
if (threadgroup) {
|
|
|
|
|
if (!thread_group_leader(tsk)) {
|
|
|
|
|
/*
|
|
|
|
|
* a race with de_thread from another thread's exec()
|
|
|
|
|
* may strip us of our leadership, if this happens,
|
|
|
|
|
* there is no choice but to throw this task away and
|
|
|
|
|
* try again; this is
|
|
|
|
|
* "double-double-toil-and-trouble-check locking".
|
|
|
|
|
*/
|
|
|
|
|
threadgroup_unlock(tsk);
|
|
|
|
|
put_task_struct(tsk);
|
|
|
|
|
goto retry_find_task;
|
|
|
|
|
}
|
|
|
|
|
ret = cgroup_attach_proc(cgrp, tsk);
|
|
|
|
|
} else
|
|
|
|
|
ret = cgroup_attach_task(cgrp, tsk);
|
|
|
|
|
threadgroup_unlock(tsk);
|
|
|
|
|
|
|
|
|
|
put_task_struct(tsk);
|
|
|
|
|
out_unlock_cgroup:
|
|
|
|
|
cgroup_unlock();
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
@@ -2305,16 +2211,7 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
|
|
|
|
|
|
|
|
|
|
static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
|
|
|
|
|
{
|
|
|
|
|
int ret;
|
|
|
|
|
do {
|
|
|
|
|
/*
|
|
|
|
|
* attach_proc fails with -EAGAIN if threadgroup leadership
|
|
|
|
|
* changes in the middle of the operation, in which case we need
|
|
|
|
|
* to find the task_struct for the new leader and start over.
|
|
|
|
|
*/
|
|
|
|
|
ret = attach_task_by_pid(cgrp, tgid, true);
|
|
|
|
|
} while (ret == -EAGAIN);
|
|
|
|
|
return ret;
|
|
|
|
|
return attach_task_by_pid(cgrp, tgid, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@@ -2804,15 +2701,20 @@ static void cgroup_advance_iter(struct cgroup *cgrp,
|
|
|
|
|
* using their cgroups capability, we don't maintain the lists running
|
|
|
|
|
* through each css_set to its tasks until we see the list actually
|
|
|
|
|
* used - in other words after the first call to cgroup_iter_start().
|
|
|
|
|
*
|
|
|
|
|
* The tasklist_lock is not held here, as do_each_thread() and
|
|
|
|
|
* while_each_thread() are protected by RCU.
|
|
|
|
|
*/
|
|
|
|
|
static void cgroup_enable_task_cg_lists(void)
|
|
|
|
|
{
|
|
|
|
|
struct task_struct *p, *g;
|
|
|
|
|
write_lock(&css_set_lock);
|
|
|
|
|
use_task_css_set_links = 1;
|
|
|
|
|
/*
|
|
|
|
|
* We need tasklist_lock because RCU is not safe against
|
|
|
|
|
* while_each_thread(). Besides, a forking task that has passed
|
|
|
|
|
* cgroup_post_fork() without seeing use_task_css_set_links = 1
|
|
|
|
|
* is not guaranteed to have its child immediately visible in the
|
|
|
|
|
* tasklist if we walk through it with RCU.
|
|
|
|
|
*/
|
|
|
|
|
read_lock(&tasklist_lock);
|
|
|
|
|
do_each_thread(g, p) {
|
|
|
|
|
task_lock(p);
|
|
|
|
|
/*
|
|
|
|
@@ -2824,6 +2726,7 @@ static void cgroup_enable_task_cg_lists(void)
|
|
|
|
|
list_add(&p->cg_list, &p->cgroups->tasks);
|
|
|
|
|
task_unlock(p);
|
|
|
|
|
} while_each_thread(g, p);
|
|
|
|
|
read_unlock(&tasklist_lock);
|
|
|
|
|
write_unlock(&css_set_lock);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -3043,6 +2946,38 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/* which pidlist file are we talking about? */
|
|
|
|
|
enum cgroup_filetype {
|
|
|
|
|
CGROUP_FILE_PROCS,
|
|
|
|
|
CGROUP_FILE_TASKS,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* A pidlist is a list of pids that virtually represents the contents of one
|
|
|
|
|
* of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
|
|
|
|
|
* a pair (one each for procs, tasks) for each pid namespace that's relevant
|
|
|
|
|
* to the cgroup.
|
|
|
|
|
*/
|
|
|
|
|
struct cgroup_pidlist {
|
|
|
|
|
/*
|
|
|
|
|
* used to find which pidlist is wanted. doesn't change as long as
|
|
|
|
|
* this particular list stays in the list.
|
|
|
|
|
*/
|
|
|
|
|
struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
|
|
|
|
|
/* array of xids */
|
|
|
|
|
pid_t *list;
|
|
|
|
|
/* how many elements the above list has */
|
|
|
|
|
int length;
|
|
|
|
|
/* how many files are using the current array */
|
|
|
|
|
int use_count;
|
|
|
|
|
/* each of these stored in a list by its cgroup */
|
|
|
|
|
struct list_head links;
|
|
|
|
|
/* pointer to the cgroup we belong to, for list removal purposes */
|
|
|
|
|
struct cgroup *owner;
|
|
|
|
|
/* protects the other fields */
|
|
|
|
|
struct rw_semaphore mutex;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The following two functions "fix" the issue where there are more pids
|
|
|
|
|
* than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
|
|
|
|
@@ -3827,7 +3762,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
|
|
|
|
set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
|
|
|
|
|
|
|
|
|
|
for_each_subsys(root, ss) {
|
|
|
|
|
struct cgroup_subsys_state *css = ss->create(ss, cgrp);
|
|
|
|
|
struct cgroup_subsys_state *css = ss->create(cgrp);
|
|
|
|
|
|
|
|
|
|
if (IS_ERR(css)) {
|
|
|
|
|
err = PTR_ERR(css);
|
|
|
|
@@ -3841,7 +3776,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
|
|
|
|
}
|
|
|
|
|
/* At error, ->destroy() callback has to free assigned ID. */
|
|
|
|
|
if (clone_children(parent) && ss->post_clone)
|
|
|
|
|
ss->post_clone(ss, cgrp);
|
|
|
|
|
ss->post_clone(cgrp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cgroup_lock_hierarchy(root);
|
|
|
|
@@ -3875,7 +3810,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
|
|
|
|
|
|
|
|
|
for_each_subsys(root, ss) {
|
|
|
|
|
if (cgrp->subsys[ss->subsys_id])
|
|
|
|
|
ss->destroy(ss, cgrp);
|
|
|
|
|
ss->destroy(cgrp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
|
|
@@ -4099,7 +4034,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
|
|
|
|
|
/* Create the top cgroup state for this subsystem */
|
|
|
|
|
list_add(&ss->sibling, &rootnode.subsys_list);
|
|
|
|
|
ss->root = &rootnode;
|
|
|
|
|
css = ss->create(ss, dummytop);
|
|
|
|
|
css = ss->create(dummytop);
|
|
|
|
|
/* We don't handle early failures gracefully */
|
|
|
|
|
BUG_ON(IS_ERR(css));
|
|
|
|
|
init_cgroup_css(css, ss, dummytop);
|
|
|
|
@@ -4188,7 +4123,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
|
|
|
|
|
* no ss->create seems to need anything important in the ss struct, so
|
|
|
|
|
* this can happen first (i.e. before the rootnode attachment).
|
|
|
|
|
*/
|
|
|
|
|
css = ss->create(ss, dummytop);
|
|
|
|
|
css = ss->create(dummytop);
|
|
|
|
|
if (IS_ERR(css)) {
|
|
|
|
|
/* failure case - need to deassign the subsys[] slot. */
|
|
|
|
|
subsys[i] = NULL;
|
|
|
|
@@ -4206,7 +4141,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
|
|
|
|
|
int ret = cgroup_init_idr(ss, css);
|
|
|
|
|
if (ret) {
|
|
|
|
|
dummytop->subsys[ss->subsys_id] = NULL;
|
|
|
|
|
ss->destroy(ss, dummytop);
|
|
|
|
|
ss->destroy(dummytop);
|
|
|
|
|
subsys[i] = NULL;
|
|
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
|
|
|
return ret;
|
|
|
|
@@ -4304,7 +4239,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
|
|
|
|
|
* pointer to find their state. note that this also takes care of
|
|
|
|
|
* freeing the css_id.
|
|
|
|
|
*/
|
|
|
|
|
ss->destroy(ss, dummytop);
|
|
|
|
|
ss->destroy(dummytop);
|
|
|
|
|
dummytop->subsys[ss->subsys_id] = NULL;
|
|
|
|
|
|
|
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
|
|
@@ -4580,7 +4515,7 @@ void cgroup_fork_callbacks(struct task_struct *child)
|
|
|
|
|
for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
|
|
|
|
|
struct cgroup_subsys *ss = subsys[i];
|
|
|
|
|
if (ss->fork)
|
|
|
|
|
ss->fork(ss, child);
|
|
|
|
|
ss->fork(child);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@@ -4596,6 +4531,17 @@ void cgroup_fork_callbacks(struct task_struct *child)
|
|
|
|
|
*/
|
|
|
|
|
void cgroup_post_fork(struct task_struct *child)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* use_task_css_set_links is set to 1 before we walk the tasklist
|
|
|
|
|
* under the tasklist_lock and we read it here after we added the child
|
|
|
|
|
* to the tasklist under the tasklist_lock as well. If the child wasn't
|
|
|
|
|
* yet in the tasklist when we walked through it from
|
|
|
|
|
* cgroup_enable_task_cg_lists(), then use_task_css_set_links value
|
|
|
|
|
* should be visible now due to the paired locking and barriers implied
|
|
|
|
|
* by LOCK/UNLOCK: it is written before the tasklist_lock unlock
|
|
|
|
|
* in cgroup_enable_task_cg_lists() and read here after the tasklist_lock
|
|
|
|
|
* lock on fork.
|
|
|
|
|
*/
|
|
|
|
|
if (use_task_css_set_links) {
|
|
|
|
|
write_lock(&css_set_lock);
|
|
|
|
|
if (list_empty(&child->cg_list)) {
|
|
|
|
@@ -4682,7 +4628,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
|
|
|
|
|
struct cgroup *old_cgrp =
|
|
|
|
|
rcu_dereference_raw(cg->subsys[i])->cgroup;
|
|
|
|
|
struct cgroup *cgrp = task_cgroup(tsk, i);
|
|
|
|
|
ss->exit(ss, cgrp, old_cgrp, tsk);
|
|
|
|
|
ss->exit(cgrp, old_cgrp, tsk);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@@ -5137,8 +5083,7 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_CGROUP_DEBUG
|
|
|
|
|
static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
|
|
|
|
|
struct cgroup *cont)
|
|
|
|
|
static struct cgroup_subsys_state *debug_create(struct cgroup *cont)
|
|
|
|
|
{
|
|
|
|
|
struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
|
|
|
|
|
|
|
|
|
@@ -5148,7 +5093,7 @@ static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
|
|
|
|
|
return css;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
|
|
|
|
|
static void debug_destroy(struct cgroup *cont)
|
|
|
|
|
{
|
|
|
|
|
kfree(cont->subsys[debug_subsys_id]);
|
|
|
|
|
}
|
|
|
|
|