Tuesday 21 April 2015

Linux spinlock implementation on x86_64 (ticketing and lock prefix)


Lets look at spin_lock implementation in kernel 3.10.0 for x86_64 machine.

In include/linux/spinlock.h we have the definition of spin_lock

static inline void spin_lock(spinlock_t *lock)
{
raw_spin_lock(&lock->rlock);
}
#define raw_spin_lock(lock) _raw_spin_lock(lock)

kernel/spinlock.c : 136
void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
{
__raw_spin_lock(lock);
}
EXPORT_SYMBOL(_raw_spin_lock);

Now the implementation goes to arch/x86/include/asm/spinlock.h: 87
static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
{
register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC };

inc = xadd(&lock->tickets, inc);
if (likely(inc.head == inc.tail))
goto out;

inc.tail &= ~TICKET_SLOWPATH_FLAG;
for (;;) {
unsigned count = SPIN_THRESHOLD;

do {
if (ACCESS_ONCE(lock->tickets.head) == inc.tail)
goto out;
cpu_relax();
} while (--count);
__ticket_lock_spinning(lock, inc.tail);
}
out: barrier(); /* make sure nothing creeps before the lock is taken */
}

If we see the implementation of arch_spin_lock we come to know that ticketing spin locks are used to do the fair sharing of the lock among various CPUs.
More about the ticket lock can be found here : http://en.wikipedia.org/wiki/Ticket_lock
The pseudo code for Lock and Unlock can be seen as :
 record locktype {
    int ticketnumber
    int turn
 }
 procedure LockInit( locktype* lock ) {
    lock.ticketnumber := 0
    lock.turn := 0
 }
 procedure Lock( locktype* lock ) {
    int myturn := FetchAndIncrement( &lock.ticketnumber )
    while lock.turn ≠ myturn
        skip // spin until lock is acquired
 }
 procedure UnLock( locktype* lock ) {
    FetchAndIncrement( &lock.turn )
 }




























Also we see the implementation of xadd function :
xadd adds "inc" to "*ptr" and atomically returns the previous value of *ptr
we see the "lock" prefix used in the xadd. This lock prefix provides synchronization if multiple CPU's wants to acquire lock at same time.

More details about the lock prefix :
http://www.codemaestro.com/reviews/8
All X86 CPUs are equipped with the ability to lock a specific memory address, preventing other system buses to read or modify it while the following instruction runs. The LOCK prefix to an assembly instruction causes the CPUs to assert the LOCK# signal, and practically ensures exclusive use of the memory address in multiprocessors / multi-thread environments.

If we disassemble the function _raw_spin_lock we see the lock prefix used before xadd :

kernel/spinlock.c: 136
0xffffffff815e90b0 <_raw_spin_lock>:    data32 data32 data32 xchg %ax,%ax
0xffffffff815e90b5 <_raw_spin_lock+5>:  push   %rbp
0xffffffff815e90b6 <_raw_spin_lock+6>:  mov    %rsp,%rbp
arch/x86/include/asm/spinlock.h: 87
0xffffffff815e90b9 <_raw_spin_lock+9>:  mov    $0x20000,%eax
0xffffffff815e90be <_raw_spin_lock+14>: lock xadd %eax,(%rdi)
0xffffffff815e90c2 <_raw_spin_lock+18>: mov    %eax,%edx
0xffffffff815e90c4 <_raw_spin_lock+20>: shr    $0x10,%edx
arch/x86/include/asm/spinlock.h: 88
0xffffffff815e90c7 <_raw_spin_lock+23>: cmp    %ax,%dx
0xffffffff815e90ca <_raw_spin_lock+26>: jne    0xffffffff815e90ce <_raw_spin_lock+30>
kernel/spinlock.c: 138
0xffffffff815e90cc <_raw_spin_lock+28>: pop    %rbp
0xffffffff815e90cd <_raw_spin_lock+29>: retq  
/usr/src/debug/kernel-3.10.0-123.el7/linux-3.10.0-123.el7.x86_64/arch/x86/include/asm/spinlock.h: 91
0xffffffff815e90ce <_raw_spin_lock+30>: and    $0xfffffffe,%edx
arch/x86/include/asm/paravirt.h: 718
0xffffffff815e90d1 <_raw_spin_lock+33>: movzwl %dx,%esi
kernel/spinlock.c: 136
0xffffffff815e90d4 <_raw_spin_lock+36>: mov    $0x8000,%eax
0xffffffff815e90d9 <_raw_spin_lock+41>: jmp    0xffffffff815e90e7 <_raw_spin_lock+55>
0xffffffff815e90db <_raw_spin_lock+43>: nopl   0x0(%rax,%rax,1)
arch/x86/include/asm/processor.h: 661
0xffffffff815e90e0 <_raw_spin_lock+48>: pause
arch/x86/include/asm/spinlock.h: 99
0xffffffff815e90e2 <_raw_spin_lock+50>: sub    $0x1,%eax
0xffffffff815e90e5 <_raw_spin_lock+53>: je     0xffffffff815e90f1 <_raw_spin_lock+65>
arch/x86/include/asm/spinlock.h: 96
0xffffffff815e90e7 <_raw_spin_lock+55>: movzwl (%rdi),%ecx
0xffffffff815e90ea <_raw_spin_lock+58>: cmp    %cx,%dx
0xffffffff815e90ed <_raw_spin_lock+61>: jne    0xffffffff815e90e0 <_raw_spin_lock+48>
kernel/spinlock.c: 138
0xffffffff815e90ef <_raw_spin_lock+63>: pop    %rbp
0xffffffff815e90f0 <_raw_spin_lock+64>: retq  
arch/x86/include/asm/paravirt.h: 718
0xffffffff815e90f1 <_raw_spin_lock+65>: data32 data32 xchg %ax,%ax
0xffffffff815e90f5 <_raw_spin_lock+69>: data32 xchg %ax,%ax
0xffffffff815e90f8 <_raw_spin_lock+72>: jmp    0xffffffff815e90d4 <_raw_spin_lock+36>
0xffffffff815e90fa <_raw_spin_lock+74>: nopw   0x0(%rax,%rax,1)


1 comment: