Add a struct timer_list to struct gb_operation and use that to implement generic operation timeouts.
This simplifies the synchronous operation handling somewhat while also providing a generic timeout mechanism that drivers can use for asynchronous operations.
Signed-off-by: Johan Hovold johan@kernel.org --- drivers/staging/greybus/loopback.c | 1 + drivers/staging/greybus/operation.c | 50 ++++++++++++++++++++++++++----------- drivers/staging/greybus/operation.h | 2 ++ 3 files changed, 39 insertions(+), 14 deletions(-)
diff --git a/drivers/staging/greybus/loopback.c b/drivers/staging/greybus/loopback.c index 6c2a41c638c3..4bee33f62fd4 100644 --- a/drivers/staging/greybus/loopback.c +++ b/drivers/staging/greybus/loopback.c @@ -629,6 +629,7 @@ static int gb_loopback_async_operation(struct gb_loopback *gb, int type, mutex_lock(&gb->mutex); ret = gb_operation_request_send(operation, gb_loopback_async_operation_callback, + 0, GFP_KERNEL); if (ret) goto error; diff --git a/drivers/staging/greybus/operation.c b/drivers/staging/greybus/operation.c index 0123109a1070..3023012808d9 100644 --- a/drivers/staging/greybus/operation.c +++ b/drivers/staging/greybus/operation.c @@ -273,18 +273,40 @@ static void gb_operation_request_handle(struct gb_operation *operation) static void gb_operation_work(struct work_struct *work) { struct gb_operation *operation; + int ret;
operation = container_of(work, struct gb_operation, work);
- if (gb_operation_is_incoming(operation)) + if (gb_operation_is_incoming(operation)) { gb_operation_request_handle(operation); - else + } else { + ret = del_timer_sync(&operation->timer); + if (!ret) { + /* Cancel request message if scheduled by timeout. */ + if (gb_operation_result(operation) == -ETIMEDOUT) + gb_message_cancel(operation->request); + } + operation->callback(operation); + }
gb_operation_put_active(operation); gb_operation_put(operation); }
+static void gb_operation_timeout(unsigned long arg) +{ + struct gb_operation *operation = (void *)arg; + + if (gb_operation_result_set(operation, -ETIMEDOUT)) { + /* + * A stuck request message will be cancelled from the + * workqueue. + */ + queue_work(gb_operation_completion_wq, &operation->work); + } +} + static void gb_operation_message_init(struct gb_host_device *hd, struct gb_message *message, u16 operation_id, size_t payload_size, u8 type) @@ -518,6 +540,9 @@ gb_operation_create_common(struct gb_connection *connection, u8 type, gfp_flags)) { goto err_request; } + + setup_timer(&operation->timer, gb_operation_timeout, + (unsigned long)operation); }
operation->flags = op_flags; @@ -679,6 +704,7 @@ static void gb_operation_sync_callback(struct gb_operation *operation) * gb_operation_request_send() - send an operation request message * @operation: the operation to initiate * @callback: the operation completion callback + * @timeout: operation timeout in milliseconds, or zero for no timeout * @gfp: the memory flags to use for any allocations * * The caller has filled in any payload so the request message is ready to go. @@ -693,6 +719,7 @@ static void gb_operation_sync_callback(struct gb_operation *operation) */ int gb_operation_request_send(struct gb_operation *operation, gb_operation_callback callback, + unsigned int timeout, gfp_t gfp) { struct gb_connection *connection = operation->connection; @@ -742,6 +769,11 @@ int gb_operation_request_send(struct gb_operation *operation, if (ret) goto err_put_active;
+ if (timeout) { + operation->timer.expires = jiffies + msecs_to_jiffies(timeout); + add_timer(&operation->timer); + } + return 0;
err_put_active: @@ -763,26 +795,16 @@ int gb_operation_request_send_sync_timeout(struct gb_operation *operation, unsigned int timeout) { int ret; - unsigned long timeout_jiffies;
ret = gb_operation_request_send(operation, gb_operation_sync_callback, - GFP_KERNEL); + timeout, GFP_KERNEL); if (ret) return ret;
- if (timeout) - timeout_jiffies = msecs_to_jiffies(timeout); - else - timeout_jiffies = MAX_SCHEDULE_TIMEOUT; - - ret = wait_for_completion_interruptible_timeout(&operation->completion, - timeout_jiffies); + ret = wait_for_completion_interruptible(&operation->completion); if (ret < 0) { /* Cancel the operation if interrupted */ gb_operation_cancel(operation, -ECANCELED); - } else if (ret == 0) { - /* Cancel the operation if op timed out */ - gb_operation_cancel(operation, -ETIMEDOUT); }
return gb_operation_result(operation); diff --git a/drivers/staging/greybus/operation.h b/drivers/staging/greybus/operation.h index de09a2c7de54..7529f01b2529 100644 --- a/drivers/staging/greybus/operation.h +++ b/drivers/staging/greybus/operation.h @@ -98,6 +98,7 @@ struct gb_operation { struct work_struct work; gb_operation_callback callback; struct completion completion; + struct timer_list timer;
struct kref kref; atomic_t waiters; @@ -164,6 +165,7 @@ bool gb_operation_response_alloc(struct gb_operation *operation,
int gb_operation_request_send(struct gb_operation *operation, gb_operation_callback callback, + unsigned int timeout, gfp_t gfp); int gb_operation_request_send_sync_timeout(struct gb_operation *operation, unsigned int timeout);
On 23/01/17 12:04, Johan Hovold wrote:
+static void gb_operation_timeout(unsigned long arg) +{
- struct gb_operation *operation = (void *)arg;
- if (gb_operation_result_set(operation, -ETIMEDOUT)) {
/*
* A stuck request message will be cancelled from the
* workqueue.
*/
queue_work(gb_operation_completion_wq, &operation->work);
- }
+}
If queue_work fails, you will never wake up the waiter.
- ret = wait_for_completion_interruptible_timeout(&operation->completion,
timeout_jiffies);
- ret = wait_for_completion_interruptible(&operation->completion);
Note, that's a case I explicitly handle (failure to queue the work) in the async set I sent out.
On gbsim at any rate, with a sufficient number of outstanding operations in-flight it's possible to cause queue_work() to fail. Since you're in a timer callback you're atomic so you can't call gb_operation_cancel() here directly, those two reasons are why I did it in a work-queue as opposed to a timer.
That's why I trapped that error at the send() stage of the operation - because testing it out on gbsim showed messages getting lost @ the queue_work stage.
--- bod
On Mon, Jan 23, 2017 at 02:32:48PM +0000, Bryan O'Donoghue wrote:
On 23/01/17 12:04, Johan Hovold wrote:
+static void gb_operation_timeout(unsigned long arg) +{
- struct gb_operation *operation = (void *)arg;
- if (gb_operation_result_set(operation, -ETIMEDOUT)) {
/*
* A stuck request message will be cancelled from the
* workqueue.
*/
queue_work(gb_operation_completion_wq, &operation->work);
- }
+}
If queue_work fails, you will never wake up the waiter.
How could queue_work fail here? The operation result is always set exactly once before being queued so this is fine.
- ret = wait_for_completion_interruptible_timeout(&operation->completion,
timeout_jiffies);
- ret = wait_for_completion_interruptible(&operation->completion);
Note, that's a case I explicitly handle (failure to queue the work) in the async set I sent out.
Yep, I noticed that, but that was for queueing your timeout work at submission. And you queued unconditionally, so you could potentially fail to queue if an operation was submitted twice, even if that would in itself be a driver bug.
On gbsim at any rate, with a sufficient number of outstanding operations in-flight it's possible to cause queue_work() to fail. Since you're in a timer callback you're atomic so you can't call gb_operation_cancel() here directly, those two reasons are why I did it in a work-queue as opposed to a timer.
Yeah, there are some non-trivial issues to deal with here (connection tear down is another). I'm quite sure you won't be able to see queue_work failing with this patch though.
That's why I trapped that error at the send() stage of the operation - because testing it out on gbsim showed messages getting lost @ the queue_work stage.
Yeah, (with some minor modifications) the patch you ended up submitting would have worked, just not as efficiently.
Thanks, Johan
On 23/01/17 15:13, Johan Hovold wrote:
On Mon, Jan 23, 2017 at 02:32:48PM +0000, Bryan O'Donoghue wrote:
On 23/01/17 12:04, Johan Hovold wrote:
+static void gb_operation_timeout(unsigned long arg) +{
- struct gb_operation *operation = (void *)arg;
- if (gb_operation_result_set(operation, -ETIMEDOUT)) {
/*
* A stuck request message will be cancelled from the
* workqueue.
*/
queue_work(gb_operation_completion_wq, &operation->work);
- }
+}
If queue_work fails, you will never wake up the waiter.
How could queue_work fail here? The operation result is always set exactly once before being queued so this is fine.
Perhaps it relates to a bug elsewhere, though I struggle to see how we could feasibly re-use the work associated with an in-flight gb_message.
Could you trap the result code and BUG_ON()/WARN_ON() for it so that we can debug this a little bit further ?
- ret = wait_for_completion_interruptible_timeout(&operation->completion,
timeout_jiffies);
- ret = wait_for_completion_interruptible(&operation->completion);
Note, that's a case I explicitly handle (failure to queue the work) in the async set I sent out.
Yep, I noticed that, but that was for queueing your timeout work at submission. And you queued unconditionally, so you could potentially fail to queue if an operation was submitted twice, even if that would in itself be a driver bug.
Have you run this through loopback without any # of in-flight operations constrained ?
On Mon, Jan 23, 2017 at 03:39:43PM +0000, Bryan O'Donoghue wrote:
On 23/01/17 15:13, Johan Hovold wrote:
On Mon, Jan 23, 2017 at 02:32:48PM +0000, Bryan O'Donoghue wrote:
On 23/01/17 12:04, Johan Hovold wrote:
+static void gb_operation_timeout(unsigned long arg) +{
- struct gb_operation *operation = (void *)arg;
- if (gb_operation_result_set(operation, -ETIMEDOUT)) {
/*
* A stuck request message will be cancelled from the
* workqueue.
*/
queue_work(gb_operation_completion_wq, &operation->work);
- }
+}
If queue_work fails, you will never wake up the waiter.
How could queue_work fail here? The operation result is always set exactly once before being queued so this is fine.
Perhaps it relates to a bug elsewhere, though I struggle to see how we could feasibly re-use the work associated with an in-flight gb_message.
Yeah, and the check you added is even for a different struct delayed_work, so I really don't think we have anything to worry about here.
Could you trap the result code and BUG_ON()/WARN_ON() for it so that we can debug this a little bit further ?
IIUC you've only seen this with some of your unfinished work during development (and for a different work struct), so I don't think we need to add a WARN_ON yet.
- ret = wait_for_completion_interruptible_timeout(&operation->completion,
timeout_jiffies);
- ret = wait_for_completion_interruptible(&operation->completion);
Note, that's a case I explicitly handle (failure to queue the work) in the async set I sent out.
Yep, I noticed that, but that was for queueing your timeout work at submission. And you queued unconditionally, so you could potentially fail to queue if an operation was submitted twice, even if that would in itself be a driver bug.
Have you run this through loopback without any # of in-flight operations constrained ?
No, not yet as that would require also your changes to the loopback driver (the loopback driver still has its custom timeout implementation after this patch).
I tested this using gbsim and synchronous operations as my Ara build setup is currently broken.
Perhaps you can rerun the tests you saw the queue_work fail with? Are you using gbsim or real hardware to test with?
Thanks, Johan
On Mon, Jan 23, 2017 at 05:50:23PM +0100, Johan Hovold wrote:
On Mon, Jan 23, 2017 at 03:39:43PM +0000, Bryan O'Donoghue wrote:
Have you run this through loopback without any # of in-flight operations constrained ?
No, not yet as that would require also your changes to the loopback driver (the loopback driver still has its custom timeout implementation after this patch).
I tested this using gbsim and synchronous operations as my Ara build setup is currently broken.
Ok, I spent my morning restoring my Ara build setup (java apparently broke after an nss update).
I tested Bryan's loopback cleanups on top of my core-timeout patch and all look fine.
Some stats below for the interested.
Consistently increased throughput, which is not unexpected even if a more controlled benchmark is needed to claim that to be a direct consequence of the more efficient timeout handling (and not at least partly due to noise). Note that the final test with 1000 outstanding async operations shows a 50% throughput increase however.
Bryan, if you could consider my review feedback and respin your last three patches on top of my timeout patch, I think we should be good.
Thanks, Johan
core timeouts + loopback cleanups --------------------------------- arche:/ # gb_loopback_test -i 1000 -s 128 -t sink -p
1970-9-3 22:9:55 test: sink path: gb_loopback0 size: 128 iterations: 1000 errors: 0 async: Disabled requests per-sec: min=438, max=443, average=441.816132, jitter=5 ap-throughput B/s: min=68403 max=69198 average=68923.312500 jitter=795 ap-latency usec: min=1801 max=3303 average=2252.021973 jitter=1502 apbridge-latency usec: min=0 max=0 average=0.000000 jitter=0 gbphy-latency usec: min=0 max=0 average=0.000000 jitter=0
arche:/ # gb_loopback_test -i 1000 -s 128 -t sink -x -c 4 -p
1970-9-3 22:9:24 test: sink path: gb_loopback0 size: 128 iterations: 1000 errors: 0 async: Enabled requests per-sec: min=1556, max=1556, average=1556.902466, jitter=0 ap-throughput B/s: min=242876 max=242876 average=242876.781250 jitter=0 ap-latency usec: min=1548 max=3949 average=2226.724121 jitter=2401 apbridge-latency usec: min=0 max=0 average=0.000000 jitter=0 gbphy-latency usec: min=0 max=0 average=0.000000 jitter=0
arche:/ # gb_loopback_test -i 1000 -s 128 -t sink -x -c 4 -p -o 2000
1970-9-3 22:7:35 test: sink path: gb_loopback0 size: 128 iterations: 1000 errors: 479 async: Enabled requests per-sec: min=889, max=889, average=889.307678, jitter=0 ap-throughput B/s: min=138731 max=138731 average=138732.000000 jitter=0 ap-latency usec: min=1489 max=3937 average=2498.234131 jitter=2448 apbridge-latency usec: min=0 max=0 average=0.000000 jitter=0 gbphy-latency usec: min=0 max=0 average=0.000000 jitter=0
arche:/ # gb_loopback_test -i 1000 -s 128 -t sink -x -c 1000 -p -o 500000
1970-9-3 22:13:37 test: sink path: gb_loopback0 size: 128 iterations: 1000 errors: 0 async: Enabled requests per-sec: min=2448, max=2448, average=2448.567871, jitter=0 ap-throughput B/s: min=381976 max=381976 average=381976.593750 jitter=0 ap-latency usec: min=1818 max=390546 average=197478.406250 jitter=388728 apbridge-latency usec: min=0 max=0 average=0.000000 jitter=0 gbphy-latency usec: min=0 max=0 average=0.000000 jitter=0
loopback custom timeouts ------------------------ arche:/ # gb_loopback_test -i 1000 -s 128 -t sink -p
1970-9-3 22:18:26 test: sink path: gb_loopback0 size: 128 iterations: 1000 errors: 0 async: Disabled requests per-sec: min=399, max=410, average=404.397247, jitter=11 ap-throughput B/s: min=62393 max=64003 average=63085.972656 jitter=1610 ap-latency usec: min=1738 max=4180 average=2456.197021 jitter=2442 apbridge-latency usec: min=0 max=0 average=0.000000 jitter=0 gbphy-latency usec: min=0 max=0 average=0.000000 jitter=0
arche:/ # gb_loopback_test -i 1000 -s 128 -t sink -x -c 4 -p
1970-9-3 22:19:45 test: sink path: gb_loopback0 size: 128 iterations: 1000 errors: 0 async: Enabled requests per-sec: min=1602, max=1602, average=1602.304810, jitter=0 ap-throughput B/s: min=249959 max=249959 average=249959.546875 jitter=0 ap-latency usec: min=1370 max=12380 average=2295.093994 jitter=11010 apbridge-latency usec: min=0 max=0 average=0.000000 jitter=0 gbphy-latency usec: min=0 max=0 average=0.000000 jitter=0
arche:/ # gb_loopback_test -i 1000 -s 128 -t sink -x -c 4 -p -o 2000
1970-9-3 22:20:21 test: sink path: gb_loopback0 size: 128 iterations: 1000 errors: 500 async: Enabled requests per-sec: min=840, max=840, average=840.522583, jitter=0 ap-throughput B/s: min=131121 max=131121 average=131121.531250 jitter=0 ap-latency usec: min=1439 max=7243 average=2192.704102 jitter=5804 apbridge-latency usec: min=0 max=0 average=0.000000 jitter=0 gbphy-latency usec: min=0 max=0 average=0.000000 jitter=0
arche:/ # gb_loopback_test -i 1000 -s 128 -t sink -x -c 1000 -p -o 500000
1970-9-3 22:21:5 test: sink path: gb_loopback0 size: 128 iterations: 1000 errors: 0 async: Enabled requests per-sec: min=1601, max=1601, average=1601.699097, jitter=0 ap-throughput B/s: min=249865 max=249865 average=249865.062500 jitter=0 ap-latency usec: min=6675 max=271112 average=137154.109375 jitter=264437 apbridge-latency usec: min=0 max=0 average=0.000000 jitter=0 gbphy-latency usec: min=0 max=0 average=0.000000 jitter=0
On Tue, Jan 24, 2017 at 03:57:54PM +0000, Bryan O'Donoghue wrote:
On 24/01/17 15:09, Johan Hovold wrote:
Bryan, if you could consider my review feedback and respin your last three patches on top of my timeout patch, I think we should be good.
Sure thing
Bryan, can I get an Acked-by: or Reviewed-by: for this patch so I can take it?
thanks,
greg k-h
On 27/01/17 09:55, Greg Kroah-Hartman wrote:
On Tue, Jan 24, 2017 at 03:57:54PM +0000, Bryan O'Donoghue wrote:
On 24/01/17 15:09, Johan Hovold wrote:
Bryan, if you could consider my review feedback and respin your last three patches on top of my timeout patch, I think we should be good.
Sure thing
Bryan, can I get an Acked-by: or Reviewed-by: for this patch so I can take it?
thanks,
greg k-h
Yes, sorry Greg, busy flying.
I'll get to this by COB Wednesday GMT.
On Mon, Jan 23, 2017 at 01:04:14PM +0100, Johan Hovold wrote:
Add a struct timer_list to struct gb_operation and use that to implement generic operation timeouts.
This simplifies the synchronous operation handling somewhat while also providing a generic timeout mechanism that drivers can use for asynchronous operations.
Signed-off-by: Johan Hovold johan@kernel.org
Greg,
I believe you can apply this one now. This is the right way to implement operation timeouts, and it is independent of Bryan's patches converting loopback to use the new interface, which can be applied on top when they are ready.
Thanks, Johan
On 07/02/17 14:19, Johan Hovold wrote:
On Mon, Jan 23, 2017 at 01:04:14PM +0100, Johan Hovold wrote:
Add a struct timer_list to struct gb_operation and use that to implement generic operation timeouts.
This simplifies the synchronous operation handling somewhat while also providing a generic timeout mechanism that drivers can use for asynchronous operations.
Signed-off-by: Johan Hovold johan@kernel.org
Greg,
I believe you can apply this one now. This is the right way to implement operation timeouts, and it is independent of Bryan's patches converting loopback to use the new interface, which can be applied on top when they are ready.
Thanks, Johan
Haven't really had time to look at this but, since it's the way you want to do it, I guess go ahead.
On Tue, Feb 07, 2017 at 02:31:04PM +0000, Bryan O'Donoghue wrote:
On 07/02/17 14:19, Johan Hovold wrote:
On Mon, Jan 23, 2017 at 01:04:14PM +0100, Johan Hovold wrote:
Add a struct timer_list to struct gb_operation and use that to implement generic operation timeouts.
This simplifies the synchronous operation handling somewhat while also providing a generic timeout mechanism that drivers can use for asynchronous operations.
Signed-off-by: Johan Hovold johan@kernel.org
Greg,
I believe you can apply this one now. This is the right way to implement operation timeouts, and it is independent of Bryan's patches converting loopback to use the new interface, which can be applied on top when they are ready.
Thanks, Johan
Haven't really had time to look at this but, since it's the way you want to do it, I guess go ahead.
That's alright. As I pointed out in my review of your patches, using delayed work to unconditionally try to cancel every operation is needlessly inefficient (remember that an operation timing out is also the exceptional case) and you run into problems with synchronisation during connection tear down (and even risk of deadlocks if using a single workqueue).
My approach using a single timer which either times out or is cancelled upon completion is about as efficient as this can get, and therefore also allows the current synchronous-operation implementation to be built on top of this generic mechanism.
Johan
On 08/02/17 09:43, Johan Hovold wrote:
On Tue, Feb 07, 2017 at 02:31:04PM +0000, Bryan O'Donoghue wrote:
On 07/02/17 14:19, Johan Hovold wrote:
On Mon, Jan 23, 2017 at 01:04:14PM +0100, Johan Hovold wrote:
Add a struct timer_list to struct gb_operation and use that to implement generic operation timeouts.
This simplifies the synchronous operation handling somewhat while also providing a generic timeout mechanism that drivers can use for asynchronous operations.
Signed-off-by: Johan Hovold johan@kernel.org
Greg,
I believe you can apply this one now. This is the right way to implement operation timeouts, and it is independent of Bryan's patches converting loopback to use the new interface, which can be applied on top when they are ready.
Thanks, Johan
My approach using a single timer which either times out or is cancelled upon completion is about as efficient as this can get, and therefore also allows the current synchronous-operation implementation to be built on top of this generic mechanism.
I'm just wondering what impact it has instead of wait_event_interruptible() more/less overhead (I suspect more) - and I reckoned you'd not be on for that change - so only made a change on the asynchronous path.
But whatever you're happier with yourself.
On Wed, Feb 08, 2017 at 11:43:57AM +0000, Bryan O'Donoghue wrote:
On 08/02/17 09:43, Johan Hovold wrote:
On Tue, Feb 07, 2017 at 02:31:04PM +0000, Bryan O'Donoghue wrote:
On 07/02/17 14:19, Johan Hovold wrote:
On Mon, Jan 23, 2017 at 01:04:14PM +0100, Johan Hovold wrote:
Add a struct timer_list to struct gb_operation and use that to implement generic operation timeouts.
This simplifies the synchronous operation handling somewhat while also providing a generic timeout mechanism that drivers can use for asynchronous operations.
Signed-off-by: Johan Hovold johan@kernel.org
Greg,
I believe you can apply this one now. This is the right way to implement operation timeouts, and it is independent of Bryan's patches converting loopback to use the new interface, which can be applied on top when they are ready.
My approach using a single timer which either times out or is cancelled upon completion is about as efficient as this can get, and therefore also allows the current synchronous-operation implementation to be built on top of this generic mechanism.
I'm just wondering what impact it has instead of wait_event_interruptible() more/less overhead (I suspect more) - and I reckoned you'd not be on for that change - so only made a change on the asynchronous path.
Yes, your approach with unconditional scheduling of a work struct for every operation was needlessly inefficient and I did not want that for the synchronous path as it would definitely be a regression (even if we would have gone with your patches as an intermediate step for the asynchronous case).
My approach does not suffer from such overhead so can therefore be used as a generic mechanism (there's exactly one timer involved whether we use wait_event_interruptible_timer or not).
Johan
On 08/02/17 11:55, Johan Hovold wrote:
On Wed, Feb 08, 2017 at 11:43:57AM +0000, Bryan O'Donoghue wrote:
On 08/02/17 09:43, Johan Hovold wrote:
On Tue, Feb 07, 2017 at 02:31:04PM +0000, Bryan O'Donoghue wrote:
On 07/02/17 14:19, Johan Hovold wrote:
On Mon, Jan 23, 2017 at 01:04:14PM +0100, Johan Hovold wrote:
Add a struct timer_list to struct gb_operation and use that to implement generic operation timeouts.
This simplifies the synchronous operation handling somewhat while also providing a generic timeout mechanism that drivers can use for asynchronous operations.
Signed-off-by: Johan Hovold johan@kernel.org
Greg,
I believe you can apply this one now. This is the right way to implement operation timeouts, and it is independent of Bryan's patches converting loopback to use the new interface, which can be applied on top when they are ready.
My approach using a single timer which either times out or is cancelled upon completion is about as efficient as this can get, and therefore also allows the current synchronous-operation implementation to be built on top of this generic mechanism.
I'm just wondering what impact it has instead of wait_event_interruptible() more/less overhead (I suspect more) - and I reckoned you'd not be on for that change - so only made a change on the asynchronous path.
Yes, your approach with unconditional scheduling of a work struct for every operation was needlessly inefficient and I did not want that for the synchronous path as it would definitely be a regression (even if we would have gone with your patches as an intermediate step for the asynchronous case).
Yes, I'm wondering what impact switching away from wait_for_completion* in favour of add/cancel timer has for every operation.
It's probably irrelevant.
My approach does not suffer from such overhead so can therefore be used as a generic mechanism (there's exactly one timer involved whether we use wait_event_interruptible_timer or not).
Hmm yes, I was surprised that the first feedback from you on this was an alternative patch but, since you feel strongly about doing it this way, it's fine with me.
Acked-by: Bryan O'Donoghue pure.logic@nexus-software.ie
On Wed, Feb 08, 2017 at 02:05:15PM +0000, Bryan O'Donoghue wrote:
On 08/02/17 11:55, Johan Hovold wrote:
On Wed, Feb 08, 2017 at 11:43:57AM +0000, Bryan O'Donoghue wrote:
On 08/02/17 09:43, Johan Hovold wrote:
On Tue, Feb 07, 2017 at 02:31:04PM +0000, Bryan O'Donoghue wrote:
On 07/02/17 14:19, Johan Hovold wrote:
On Mon, Jan 23, 2017 at 01:04:14PM +0100, Johan Hovold wrote: > Add a struct timer_list to struct gb_operation and use that to implement > generic operation timeouts. > > This simplifies the synchronous operation handling somewhat while also > providing a generic timeout mechanism that drivers can use for > asynchronous operations. > > Signed-off-by: Johan Hovold johan@kernel.org
Greg,
I believe you can apply this one now. This is the right way to implement operation timeouts, and it is independent of Bryan's patches converting loopback to use the new interface, which can be applied on top when they are ready.
My approach using a single timer which either times out or is cancelled upon completion is about as efficient as this can get, and therefore also allows the current synchronous-operation implementation to be built on top of this generic mechanism.
I'm just wondering what impact it has instead of wait_event_interruptible() more/less overhead (I suspect more) - and I reckoned you'd not be on for that change - so only made a change on the asynchronous path.
Yes, your approach with unconditional scheduling of a work struct for every operation was needlessly inefficient and I did not want that for the synchronous path as it would definitely be a regression (even if we would have gone with your patches as an intermediate step for the asynchronous case).
Yes, I'm wondering what impact switching away from wait_for_completion* in favour of add/cancel timer has for every operation.
It's probably irrelevant.
I think so, yes. There is still a timer hidden in wait_for_completion_timeout as mentioned below.
My approach does not suffer from such overhead so can therefore be used as a generic mechanism (there's exactly one timer involved whether we use wait_event_interruptible_timer or not).
Hmm yes, I was surprised that the first feedback from you on this was an alternative patch but, since you feel strongly about doing it this way, it's fine with me.
Why would we go with a suboptimal approach, which in its present form still had issues that were unresolved?
I told you from the start (August?) that I did not like the deferred- work approach and that a proper solution was planned for v2. Project got cancelled, but I now spent some time working out an implementation instead of spending more time on your patch.
Acked-by: Bryan O'Donoghue pure.logic@nexus-software.ie
Thanks.
Johan
On 08/02/17 14:16, Johan Hovold wrote:
On Wed, Feb 08, 2017 at 02:05:15PM +0000, Bryan O'Donoghue wrote:
On 08/02/17 11:55, Johan Hovold wrote:
On Wed, Feb 08, 2017 at 11:43:57AM +0000, Bryan O'Donoghue wrote:
On 08/02/17 09:43, Johan Hovold wrote:
On Tue, Feb 07, 2017 at 02:31:04PM +0000, Bryan O'Donoghue wrote:
On 07/02/17 14:19, Johan Hovold wrote: > On Mon, Jan 23, 2017 at 01:04:14PM +0100, Johan Hovold wrote: >> Add a struct timer_list to struct gb_operation and use that to implement >> generic operation timeouts. >> >> This simplifies the synchronous operation handling somewhat while also >> providing a generic timeout mechanism that drivers can use for >> asynchronous operations. >> >> Signed-off-by: Johan Hovold johan@kernel.org > > Greg, > > I believe you can apply this one now. This is the right way to implement > operation timeouts, and it is independent of Bryan's patches converting > loopback to use the new interface, which can be applied on top when they > are ready.
My approach using a single timer which either times out or is cancelled upon completion is about as efficient as this can get, and therefore also allows the current synchronous-operation implementation to be built on top of this generic mechanism.
I'm just wondering what impact it has instead of wait_event_interruptible() more/less overhead (I suspect more) - and I reckoned you'd not be on for that change - so only made a change on the asynchronous path.
Yes, your approach with unconditional scheduling of a work struct for every operation was needlessly inefficient and I did not want that for the synchronous path as it would definitely be a regression (even if we would have gone with your patches as an intermediate step for the asynchronous case).
Yes, I'm wondering what impact switching away from wait_for_completion* in favour of add/cancel timer has for every operation.
It's probably irrelevant.
I think so, yes. There is still a timer hidden in wait_for_completion_timeout as mentioned below.
My approach does not suffer from such overhead so can therefore be used as a generic mechanism (there's exactly one timer involved whether we use wait_event_interruptible_timer or not).
Hmm yes, I was surprised that the first feedback from you on this was an alternative patch but, since you feel strongly about doing it this way, it's fine with me.
Why would we go with a suboptimal approach, which in its present form still had issues that were unresolved?
I told you from the start (August?) that I did not like the deferred- work approach and that a proper solution was planned for v2.
If you say so. Like I say it was surprising and from my perspective this is the first meaningful feedback from you on this.
Let's not argue - it's acked.
--- bod