Re: [Linaro-mm-sig] [PATCH 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes

13 Jun 2018


      On 06/13/2018 11:50 AM, Peter Zijlstra wrote:
...
...


lockdep_assert_held(&lock->wait_lock);

if (owner && hold_ctx && __ww_ctx_stamp_after(hold_ctx, ww_ctx) &&
   ww_ctx->acquired > 0) {


WRITE_ONCE(hold_ctx->wounded, true);


if (owner != current) {


	/*


	 * wake_up_process() inserts a write memory barrier to



It does no such thing. But yes, it does ensure the wakee sees all prior
stores IFF the wakeup happened.
...

	 * make sure owner sees it is wounded before


	 * TASK_RUNNING in case it's sleeping on another


	 * ww_mutex. Note that owner points to a valid


	 * task_struct as long as we hold the wait_lock.


	 */



What exactly are you trying to say here ?
I'm thinking this is the pairing barrier to the smp_mb() below, with
your list_empty() thing? Might make sense to write a single coherent
comment and refer to the other location.
So what I'm trying to say here is that wake_up_process() ensures that 
the owner, if in !TASK_RUNNING, sees the write to hold_ctx->wounded 
before the transition to TASK_RUNNING. This was how I interpreted "woken 
up" in the wake up process documentation.
...
...

	wake_up_process(owner);


}


return true;


}

return false;

+}

/*
Wake up any waiters that may have to back off when the lock is held by the
given context.

Due to the invariants on the wait list, this can only affect the first





waiter with a context.





waiter with a context, unless the Wound-Wait algorithm is used where



also subsequent waiters with a context main wound the lock holder.

The current task must not be on the wait list.

*/

@@ -303,6 +338,7 @@ static void __sched
  __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
  {
   struct mutex_waiter *cur;

bool is_wait_die = ww_ctx->ww_class->is_wait_die;

lockdep_assert_held(&lock->wait_lock);
  
@@ -310,13 +346,14 @@ __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
   	if (!cur->ww_ctx)
   		continue;

if (cur->ww_ctx->acquired > 0 &&




if (is_wait_die && cur->ww_ctx->acquired > 0 &&

__ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) {
	debug_mutex_wake_waiter(lock, cur);
	wake_up_process(cur->task);
}


break;




if (is_wait_die || __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))


	break;

}
}

@@ -338,12 +375,17 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
    * and keep spinning, or it will acquire wait_lock, add itself
    * to waiter list and sleep.
    */

smp_mb(); /* ^^^ */


smp_mb(); /* See comments above and below. */

/*

* Check if lock is contended, if not there is nobody to wake up




* Check if lock is contended, if not there is nobody to wake up.


* Checking MUTEX_FLAG_WAITERS is not enough here,



That seems like a superfluous thing to say. It makes sense in the
context of this patch because we change the FLAG check into a list
check, but the resulting comment/code looks odd.
...
					   since we need to


* order against the lock->ctx check in __ww_mutex_wound called from


* __ww_mutex_add_waiter. We can use list_empty without taking the


* wait_lock, given the memory barrier above and the list_empty


* documentation.



I don't trust documentation. Please reason about implementation.
Will do.
...
...
*/


if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS)))


if (likely(list_empty(&lock->base.wait_list)))
return;

/*
@@ -653,6 +695,17 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct mutex_waiter *waiter,
   struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
   struct mutex_waiter *cur;

/*
* If we miss a wounded == true here, we will have a pending



Explain how we can miss that.
This is actually the pairing location of the wake_up_process() comment / 
code discussed above. Here we should have !TASK_RUNNING, and let's say 
ctx->wounded is set by another process immediately after we've read it 
(we "miss" it). At that point there must be a pending wake-up-process() 
for us and we'll pick up the set value of wounded on the next iteration 
after returning from schedule().
...
...

* TASK_RUNNING and pick it up on the next schedule fall-through.


*/


if (!ctx->ww_class->is_wait_die) {
if (READ_ONCE(ctx->wounded))


	goto deadlock;


else


	return 0;


}
if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx))
  goto deadlock;

@@ -683,12 +736,15 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
  {
   struct mutex_waiter *cur;
   struct list_head *pos;

bool is_wait_die;

if (!ww_ctx) {
   	list_add_tail(&waiter->list, &lock->wait_list);
   	return 0;
   }

is_wait_die = ww_ctx->ww_class->is_wait_die;
/*
Add the waiter before the first waiter with a higher stamp.
Waiters without a context are skipped to avoid starving



@@ -701,7 +757,7 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
  
   	if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
   		/* Back off immediately if necessary. */

	if (ww_ctx->acquired > 0) {




	if (is_wait_die && ww_ctx->acquired > 0) {

#ifdef CONFIG_DEBUG_MUTEXES
		struct ww_mutex *ww;

@@ -721,13 +777,26 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
   	 * Wake up the waiter so that it gets a chance to back
   	 * off.
   	 */

if (cur->ww_ctx->acquired > 0) {




if (is_wait_die && cur->ww_ctx->acquired > 0) {
debug_mutex_wake_waiter(lock, cur);
wake_up_process(cur->task);

}
}

list_add_tail(&waiter->list, pos);

if (!is_wait_die) {
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);



/*


 * Make sure a racing lock taker sees a non-empty waiting list


 * before we read ww->ctx, so that if we miss ww->ctx, the


 * racing lock taker will call __ww_mutex_wake_up_for_backoff()


 * and wound itself.


 */


smp_mb();


__ww_mutex_wound(lock, ww_ctx, ww->ctx);


}
return 0;
}

@@ -750,6 +819,14 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
   if (use_ww_ctx && ww_ctx) {
   	if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
   		return -EALREADY;


/*


 * Reset the wounded flag after a backoff.


 * No other process can race and wound us here since they


 * can't have a valid owner pointer at this time


 */


if (ww_ctx->acquired == 0)


	ww_ctx->wounded = false;

}

preempt_disable();
@@ -858,6 +935,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
  acquired:
   __set_current_state(TASK_RUNNING);

/* We stole the lock. Need to check wounded status. */
if (use_ww_ctx && ww_ctx && !ww_ctx->ww_class->is_wait_die &&
   !__mutex_waiter_is_first(lock, &waiter))


__ww_mutex_wakeup_for_backoff(lock, ww_ctx);


mutex_remove_waiter(lock, &waiter, current);
 if (likely(list_empty(&lock->wait_list)))
  __mutex_clear_flag(lock, MUTEX_FLAGS);

I can't say I'm a fan. I'm already cursing the ww_mutex stuff every time
I have to look at it, and you just made it worse spagethi.
Thanks for the review.
Well, I can't speak for the current ww implementation except I didn't 
think it was too hard to understand for a first time reader.
Admittedly the Wound-Wait path makes it worse since it's a preemptive 
algorithm and we need to touch other processes a acquire contexts and 
worry about ordering.
So, assuming your review comments are fixed up, is that a solid NAK or 
do you have any suggestion that would make you more comfortable with the 
code? like splitting out ww-stuff to a separate file?
/Thomas

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

Re: [Linaro-mm-sig] [PATCH 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes