[PATCH] lightweight robust futexes: core

Add the core infrastructure for robust futexes: structure definitions, the new
syscalls and the do_exit() based cleanup mechanism.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Acked-by: Ulrich Drepper <drepper@redhat.com>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Ingo Molnar
2006-03-27 01:16:22 -08:00
committed by Linus Torvalds
parent e9056f13bf
commit 0771dfefc9
6 changed files with 279 additions and 1 deletions

View File

@ -1,6 +1,8 @@
#ifndef _LINUX_FUTEX_H
#define _LINUX_FUTEX_H
#include <linux/sched.h>
/* Second argument to futex syscall */
@ -11,10 +13,103 @@
#define FUTEX_CMP_REQUEUE 4
#define FUTEX_WAKE_OP 5
/*
* Support for robust futexes: the kernel cleans up held futexes at
* thread exit time.
*/
/*
* Per-lock list entry - embedded in user-space locks, somewhere close
* to the futex field. (Note: user-space uses a double-linked list to
* achieve O(1) list add and remove, but the kernel only needs to know
* about the forward link)
*
* NOTE: this structure is part of the syscall ABI, and must not be
* changed.
*/
struct robust_list {
struct robust_list __user *next;
};
/*
* Per-thread list head:
*
* NOTE: this structure is part of the syscall ABI, and must only be
* changed if the change is first communicated with the glibc folks.
* (When an incompatible change is done, we'll increase the structure
* size, which glibc will detect)
*/
struct robust_list_head {
/*
* The head of the list. Points back to itself if empty:
*/
struct robust_list list;
/*
* This relative offset is set by user-space, it gives the kernel
* the relative position of the futex field to examine. This way
* we keep userspace flexible, to freely shape its data-structure,
* without hardcoding any particular offset into the kernel:
*/
long futex_offset;
/*
* The death of the thread may race with userspace setting
* up a lock's links. So to handle this race, userspace first
* sets this field to the address of the to-be-taken lock,
* then does the lock acquire, and then adds itself to the
* list, and then clears this field. Hence the kernel will
* always have full knowledge of all locks that the thread
* _might_ have taken. We check the owner TID in any case,
* so only truly owned locks will be handled.
*/
struct robust_list __user *list_op_pending;
};
/*
* Are there any waiters for this robust futex:
*/
#define FUTEX_WAITERS 0x80000000
/*
* The kernel signals via this bit that a thread holding a futex
* has exited without unlocking the futex. The kernel also does
* a FUTEX_WAKE on such futexes, after setting the bit, to wake
* up any possible waiters:
*/
#define FUTEX_OWNER_DIED 0x40000000
/*
* Reserved bit:
*/
#define FUTEX_OWNER_PENDING 0x20000000
/*
* The rest of the robust-futex field is for the TID:
*/
#define FUTEX_TID_MASK 0x1fffffff
/*
* A limit of one million locks held per thread (!) ought to be enough
* for some time. This also protects against a deliberately circular
* list. Not worth introducing an rlimit for this:
*/
#define ROBUST_LIST_LIMIT 1048576
long do_futex(unsigned long uaddr, int op, int val,
unsigned long timeout, unsigned long uaddr2, int val2,
int val3);
extern int handle_futex_death(unsigned int *uaddr, struct task_struct *curr);
#ifdef CONFIG_FUTEX
extern void exit_robust_list(struct task_struct *curr);
#else
static inline void exit_robust_list(struct task_struct *curr)
{
}
#endif
#define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */
#define FUTEX_OP_ADD 1 /* *(int *)UADDR2 += OPARG; */
#define FUTEX_OP_OR 2 /* *(int *)UADDR2 |= OPARG; */

View File

@ -35,6 +35,7 @@
#include <linux/topology.h>
#include <linux/seccomp.h>
#include <linux/rcupdate.h>
#include <linux/futex.h>
#include <linux/auxvec.h> /* For AT_VECTOR_SIZE */
@ -872,6 +873,8 @@ struct task_struct {
int cpuset_mems_generation;
int cpuset_mem_spread_rotor;
#endif
struct robust_list_head __user *robust_list;
atomic_t fs_excl; /* holding fs exclusive resources */
struct rcu_head rcu;
};

View File

@ -28,7 +28,8 @@
#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)
/*
* A maximum of 4 million PIDs should be enough for a while:
* A maximum of 4 million PIDs should be enough for a while.
* [NOTE: PID/TIDs are limited to 2^29 ~= 500+ million, see futex.h.]
*/
#define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \
(sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT))