Add 2 new kind of event for running a memory or a io bounded load. "mem" name for a load is memory bounded, and "iorun" name for a load is io bounded. The default file to be written to create the load is /dev/null and the device/file could be specified with "io_device" key in "global" section.
E.g. "tasks" : { "thread0" : { "sleep" : 1000, "run" : 100, "mem" : 1000, "sleep" 10000, "iorun" : 1000 } }, "global" : { "io_device" : "/dev/ttyS0" }
Signed-off-by: pi-cheng.chen pi-cheng.chen@linaro.org --- src/rt-app.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ src/rt-app.h | 2 ++ src/rt-app_parse_config.c | 23 +++++++++++++++ src/rt-app_types.h | 4 +++ 4 files changed, 103 insertions(+)
diff --git a/src/rt-app.c b/src/rt-app.c index 3cd601d..13f72e4 100644 --- a/src/rt-app.c +++ b/src/rt-app.c @@ -33,6 +33,8 @@ static volatile int continue_running; static pthread_t *threads; static int nthreads; static int p_load; +static char *buffer[2]; +static int io_fd; rtapp_options_t opts;
static ftrace_data_t ft_data = { @@ -110,6 +112,45 @@ static inline loadwait(unsigned long exec) return load_count; }
+static void ioload(unsigned long count) +{ + ssize_t ret; + char *buf = buffer[0]; + + while (count != 0) { + ret = write(io_fd, buffer, count); + if (ret == -1) { + perror("write"); + return; + } + count -= ret; + buf += ret; + } +} + +static void memload(unsigned long count) +{ + static unsigned long current = 0; + + while (count > 0) { + unsigned long size; + + if (count > MEM_BUFFER_SIZE) + size = MEM_BUFFER_SIZE; + else + size = count; + + if (size > (MEM_BUFFER_SIZE - current)) + size = MEM_BUFFER_SIZE - current; + + memcpy(buffer[0], buffer[1], size); + count -= size; + current += size; + if (current >= MEM_BUFFER_SIZE) + current -= MEM_BUFFER_SIZE; + } +} + static int run_event(event_data_t *event, int dry_run, unsigned long *perf, unsigned long *duration, rtapp_resource_t *resources) { @@ -196,6 +237,18 @@ static int run_event(event_data_t *event, int dry_run, pthread_mutex_unlock(&(ddata->res.mtx.obj)); break; } + case rtapp_mem: + { + log_debug("mem %d", event->count); + memload(event->count); + } + break; + case rtapp_iorun: + { + log_debug("iorun %d", event->count); + ioload(event->count); + } + break; }
return lock; @@ -488,6 +541,22 @@ int main(int argc, char* argv[])
parse_command_line(argc, argv, &opts);
+ /* allocate memory buffers for memory-bound and IO-bound busy loops */ + buffer[0] = malloc(MEM_BUFFER_SIZE); + buffer[1] = malloc(MEM_BUFFER_SIZE); + if (!buffer[0] || !buffer[1]) { + log_error("Cannot allocate memory buffers for memory-bound and" + "IO-bound busy loops"); + exit(EXIT_FAILURE); + } + + /* open file for IO-bound busy loops */ + io_fd = open(opts.io_device, O_CREAT | O_WRONLY, 0644); + if (io_fd < 0) { + log_error("Cannot open io_device file %s", opts.io_device); + exit(EXIT_FAILURE); + } + /* allocated threads */ nthreads = opts.nthreads; threads = malloc(nthreads * sizeof(pthread_t)); @@ -713,6 +782,11 @@ int main(int argc, char* argv[]) close(ft_data.trace_fd); close(ft_data.marker_fd); } + + close(io_fd); + free(buffer[0]); + free(buffer[1]); + exit(EXIT_SUCCESS);
diff --git a/src/rt-app.h b/src/rt-app.h index d55271f..477d493 100644 --- a/src/rt-app.h +++ b/src/rt-app.h @@ -36,6 +36,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#define BUDGET_OVERP 5
+#define MEM_BUFFER_SIZE (4 * 1024 * 1024) + void *thread_body(void *arg);
#endif /* _RT_APP_H_ */ diff --git a/src/rt-app_parse_config.c b/src/rt-app_parse_config.c index e062f79..f715424 100644 --- a/src/rt-app_parse_config.c +++ b/src/rt-app_parse_config.c @@ -320,6 +320,22 @@ parse_thread_event_data(char *name, struct json_object *obj, return; }
+ if (!strncmp(name, "mem", strlen("mem")) || + !strncmp(name, "iorun", strlen("iorun"))) { + if (!json_object_is_type(obj, json_type_int)) + goto unknown_event; + + data->count = json_object_get_int(obj); + + if (!strncmp(name, "mem", strlen("mem"))) + data->type = rtapp_mem; + else + data->type = rtapp_iorun; + + log_info(PIN2 "type %d count %d", data->type, data->count); + return; + } + if (!strncmp(name, "lock", strlen("lock")) || !strncmp(name, "unlock", strlen("unlock"))) {
@@ -493,6 +509,10 @@ obj_is_event(char *name) return rtapp_suspend; if (!strncmp(name, "resume", strlen("resume"))) return rtapp_resume; + if (!strncmp(name, "mem", strlen("mem"))) + return rtapp_mem; + if (!strncmp(name, "iorun", strlen("iorun"))) + return rtapp_iorun;
return 0; } @@ -677,6 +697,7 @@ parse_global(struct json_object *global, rtapp_options_t *opts) opts->logbasename = strdup("rt-app"); opts->ftrace = 0; opts->pi_enabled = 0; + opts->io_device = strdup("/dev/null"); return; }
@@ -719,6 +740,8 @@ parse_global(struct json_object *global, rtapp_options_t *opts) TRUE, "rt-app"); opts->ftrace = get_bool_value_from(global, "ftrace", TRUE, 0); opts->pi_enabled = get_bool_value_from(global, "pi_enabled", TRUE, 0); + opts->io_device = get_string_value_from(global, "io_device", TRUE, + "/dev/null");
}
diff --git a/src/rt-app_types.h b/src/rt-app_types.h index cefb3a6..3a7c231 100644 --- a/src/rt-app_types.h +++ b/src/rt-app_types.h @@ -65,6 +65,8 @@ typedef enum resource_t rtapp_timer, rtapp_suspend, rtapp_resume, + rtapp_mem, + rtapp_iorun, } resource_t;
struct _rtapp_mutex { @@ -103,6 +105,7 @@ typedef struct _event_data_t { int res; int dep; int duration; + int count; } event_data_t;
typedef struct _phase_data_t { @@ -165,6 +168,7 @@ typedef struct _rtapp_options_t {
int ftrace; int die_on_dmiss; + char *io_device; } rtapp_options_t;
typedef struct _timing_point_t {
On Tue, 2014-12-02 at 15:21 +0800, pi-cheng.chen wrote:
Add 2 new kind of event for running a memory or a io bounded load. "mem" name for a load is memory bounded, and "iorun" name for a load is io bounded. The default file to be written to create the load is /dev/null and the device/file could be specified with "io_device" key in "global" section.
E.g. "tasks" : { "thread0" : { "sleep" : 1000, "run" : 100, "mem" : 1000, "sleep" 10000, "iorun" : 1000 } }, "global" : { "io_device" : "/dev/ttyS0" }
Wouldn't be better if we can specify size of the accessed memory region, instead of iteration count? We can use phases to create arbitrary number of iterations for memory access.
Thanks, Ivan
On 2 December 2014 at 19:02, Ivan T. Ivanov iivanov@mm-sol.com wrote:
On Tue, 2014-12-02 at 15:21 +0800, pi-cheng.chen wrote:
Add 2 new kind of event for running a memory or a io bounded load. "mem" name for a load is memory bounded, and "iorun" name for a load is io bounded. The default file to be written to create the load is /dev/null and the device/file could be specified with "io_device" key in "global" section.
E.g. "tasks" : { "thread0" : { "sleep" : 1000, "run" : 100, "mem" : 1000, "sleep" 10000, "iorun" : 1000 } }, "global" : { "io_device" : "/dev/ttyS0" }
Wouldn't be better if we can specify size of the accessed memory region, instead of iteration count? We can use phases to create arbitrary number of iterations for memory access.
Thanks, Ivan
Hi Ivan,
I think the value of "mem" key we specify here is taken as the size of the memory to be memcpy() in the patch, not as iteration count. Did I misunderstand what you mean? Could you please be clearer?
Thanks. Pi-Cheng
On Wed, 2014-12-03 at 14:39 +0800, Pi-Cheng Chen wrote:
On 2 December 2014 at 19:02, Ivan T. Ivanov iivanov@mm-sol.com wrote:
On Tue, 2014-12-02 at 15:21 +0800, pi-cheng.chen wrote:
Add 2 new kind of event for running a memory or a io bounded load. "mem" name for a load is memory bounded, and "iorun" name for a load is io bounded. The default file to be written to create the load is /dev/null and the device/file could be specified with "io_device" key in "global" section.
E.g. "tasks" : { "thread0" : { "sleep" : 1000, "run" : 100, "mem" : 1000, "sleep" 10000, "iorun" : 1000 } }, "global" : { "io_device" : "/dev/ttyS0" }
Wouldn't be better if we can specify size of the accessed memory region, instead of iteration count? We can use phases to create arbitrary number of iterations for memory access.
Thanks, Ivan
Hi Ivan,
I think the value of "mem" key we specify here is taken as the size of the memory to be memcpy() in the patch, not as iteration count. Did I misunderstand what you mean?
No. I have missed that count is used to specify memory chunk size, sorry.
Thank you. Ivan
On 2 December 2014 at 08:21, pi-cheng.chen pi-cheng.chen@linaro.org wrote:
Add 2 new kind of event for running a memory or a io bounded load. "mem" name for a load is memory bounded, and "iorun" name for a load is io bounded. The default file to be written to create the load is /dev/null and the device/file could be specified with "io_device" key in "global" section.
Hi pi-cheng
What's the unit of the mem and iorun ? As an example, "mem" : 2000 will do a copy of 2000 Bytes ? KBytes ?
E.g. "tasks" : { "thread0" : { "sleep" : 1000, "run" : 100, "mem" : 1000, "sleep" 10000, "iorun" : 1000 } }, "global" : { "io_device" : "/dev/ttyS0" }
Signed-off-by: pi-cheng.chen pi-cheng.chen@linaro.org
src/rt-app.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ src/rt-app.h | 2 ++ src/rt-app_parse_config.c | 23 +++++++++++++++ src/rt-app_types.h | 4 +++ 4 files changed, 103 insertions(+)
diff --git a/src/rt-app.c b/src/rt-app.c index 3cd601d..13f72e4 100644 --- a/src/rt-app.c +++ b/src/rt-app.c @@ -33,6 +33,8 @@ static volatile int continue_running; static pthread_t *threads; static int nthreads; static int p_load; +static char *buffer[2]; +static int io_fd; rtapp_options_t opts;
static ftrace_data_t ft_data = { @@ -110,6 +112,45 @@ static inline loadwait(unsigned long exec) return load_count; }
+static void ioload(unsigned long count) +{
ssize_t ret;
char *buf = buffer[0];
while (count != 0) {
ret = write(io_fd, buffer, count);
count can be higher than buffer size
so you have defined a MEM_BUFFER_SIZE for mem transfer but not for iorun ?
if (ret == -1) {
perror("write");
return;
}
count -= ret;
buf += ret;
}
+}
+static void memload(unsigned long count) +{
static unsigned long current = 0;
while (count > 0) {
unsigned long size;
if (count > MEM_BUFFER_SIZE)
size = MEM_BUFFER_SIZE;
else
size = count;
if (size > (MEM_BUFFER_SIZE - current))
size = MEM_BUFFER_SIZE - current;
memcpy(buffer[0], buffer[1], size);
I wonder if a memset would be better
count -= size;
current += size;
if (current >= MEM_BUFFER_SIZE)
current -= MEM_BUFFER_SIZE;
The size of MEM_BUFFER_SIZE will deeply impact how we will trash the cache and how the memory access (up to the ddr) will be the bottleneck. This should be at least configurable in the global section
}
+}
static int run_event(event_data_t *event, int dry_run, unsigned long *perf, unsigned long *duration, rtapp_resource_t *resources) { @@ -196,6 +237,18 @@ static int run_event(event_data_t *event, int dry_run, pthread_mutex_unlock(&(ddata->res.mtx.obj)); break; }
case rtapp_mem:
{
log_debug("mem %d", event->count);
memload(event->count);
}
break;
case rtapp_iorun:
{
log_debug("iorun %d", event->count);
ioload(event->count);
}
break; } return lock;
@@ -488,6 +541,22 @@ int main(int argc, char* argv[])
parse_command_line(argc, argv, &opts);
/* allocate memory buffers for memory-bound and IO-bound busy loops */
buffer[0] = malloc(MEM_BUFFER_SIZE);
buffer[1] = malloc(MEM_BUFFER_SIZE);
You use the same buffer for all threads
if (!buffer[0] || !buffer[1]) {
log_error("Cannot allocate memory buffers for memory-bound and"
"IO-bound busy loops");
exit(EXIT_FAILURE);
}
/* open file for IO-bound busy loops */
io_fd = open(opts.io_device, O_CREAT | O_WRONLY, 0644);
if (io_fd < 0) {
log_error("Cannot open io_device file %s", opts.io_device);
exit(EXIT_FAILURE);
}
/* allocated threads */ nthreads = opts.nthreads; threads = malloc(nthreads * sizeof(pthread_t));
@@ -713,6 +782,11 @@ int main(int argc, char* argv[]) close(ft_data.trace_fd); close(ft_data.marker_fd); }
close(io_fd);
free(buffer[0]);
free(buffer[1]);
exit(EXIT_SUCCESS);
diff --git a/src/rt-app.h b/src/rt-app.h index d55271f..477d493 100644 --- a/src/rt-app.h +++ b/src/rt-app.h @@ -36,6 +36,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#define BUDGET_OVERP 5
+#define MEM_BUFFER_SIZE (4 * 1024 * 1024)
Why 4MB ?
void *thread_body(void *arg);
#endif /* _RT_APP_H_ */ diff --git a/src/rt-app_parse_config.c b/src/rt-app_parse_config.c index e062f79..f715424 100644 --- a/src/rt-app_parse_config.c +++ b/src/rt-app_parse_config.c @@ -320,6 +320,22 @@ parse_thread_event_data(char *name, struct json_object *obj, return; }
if (!strncmp(name, "mem", strlen("mem")) ||
!strncmp(name, "iorun", strlen("iorun"))) {
if (!json_object_is_type(obj, json_type_int))
goto unknown_event;
data->count = json_object_get_int(obj);
if (!strncmp(name, "mem", strlen("mem")))
data->type = rtapp_mem;
else
data->type = rtapp_iorun;
log_info(PIN2 "type %d count %d", data->type, data->count);
return;
}
if (!strncmp(name, "lock", strlen("lock")) || !strncmp(name, "unlock", strlen("unlock"))) {
@@ -493,6 +509,10 @@ obj_is_event(char *name) return rtapp_suspend; if (!strncmp(name, "resume", strlen("resume"))) return rtapp_resume;
if (!strncmp(name, "mem", strlen("mem")))
return rtapp_mem;
if (!strncmp(name, "iorun", strlen("iorun")))
return rtapp_iorun; return 0;
} @@ -677,6 +697,7 @@ parse_global(struct json_object *global, rtapp_options_t *opts) opts->logbasename = strdup("rt-app"); opts->ftrace = 0; opts->pi_enabled = 0;
opts->io_device = strdup("/dev/null"); return; }
@@ -719,6 +740,8 @@ parse_global(struct json_object *global, rtapp_options_t *opts) TRUE, "rt-app"); opts->ftrace = get_bool_value_from(global, "ftrace", TRUE, 0); opts->pi_enabled = get_bool_value_from(global, "pi_enabled", TRUE, 0);
opts->io_device = get_string_value_from(global, "io_device", TRUE,
"/dev/null");
}
diff --git a/src/rt-app_types.h b/src/rt-app_types.h index cefb3a6..3a7c231 100644 --- a/src/rt-app_types.h +++ b/src/rt-app_types.h @@ -65,6 +65,8 @@ typedef enum resource_t rtapp_timer, rtapp_suspend, rtapp_resume,
rtapp_mem,
rtapp_iorun,
} resource_t;
struct _rtapp_mutex { @@ -103,6 +105,7 @@ typedef struct _event_data_t { int res; int dep; int duration;
int count;
} event_data_t;
typedef struct _phase_data_t { @@ -165,6 +168,7 @@ typedef struct _rtapp_options_t {
int ftrace; int die_on_dmiss;
char *io_device;
} rtapp_options_t;
typedef struct _timing_point_t {
1.9.1
On 2 December 2014 at 19:04, Vincent Guittot vincent.guittot@linaro.org wrote:
On 2 December 2014 at 08:21, pi-cheng.chen pi-cheng.chen@linaro.org wrote:
Add 2 new kind of event for running a memory or a io bounded load. "mem" name for a load is memory bounded, and "iorun" name for a load is io bounded. The default file to be written to create the load is /dev/null and the device/file could be specified with "io_device" key in "global" section.
Hi pi-cheng
What's the unit of the mem and iorun ? As an example, "mem" : 2000 will do a copy of 2000 Bytes ? KBytes ?
Hi Vincent,
Thanks for reviewing. The unit of mem and iorun here is the size to be copied/written in byte. As an example, "mem" : 2000 will do a 2000 bytes copy.
E.g. "tasks" : { "thread0" : { "sleep" : 1000, "run" : 100, "mem" : 1000, "sleep" 10000, "iorun" : 1000 } }, "global" : { "io_device" : "/dev/ttyS0" }
Signed-off-by: pi-cheng.chen pi-cheng.chen@linaro.org
src/rt-app.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ src/rt-app.h | 2 ++ src/rt-app_parse_config.c | 23 +++++++++++++++ src/rt-app_types.h | 4 +++ 4 files changed, 103 insertions(+)
diff --git a/src/rt-app.c b/src/rt-app.c index 3cd601d..13f72e4 100644 --- a/src/rt-app.c +++ b/src/rt-app.c @@ -33,6 +33,8 @@ static volatile int continue_running; static pthread_t *threads; static int nthreads; static int p_load; +static char *buffer[2]; +static int io_fd; rtapp_options_t opts;
static ftrace_data_t ft_data = { @@ -110,6 +112,45 @@ static inline loadwait(unsigned long exec) return load_count; }
+static void ioload(unsigned long count) +{
ssize_t ret;
char *buf = buffer[0];
while (count != 0) {
ret = write(io_fd, buffer, count);
count can be higher than buffer size
so you have defined a MEM_BUFFER_SIZE for mem transfer but not for iorun ?
I forgot to do such check for iorun. Will do it.
if (ret == -1) {
perror("write");
return;
}
count -= ret;
buf += ret;
}
+}
+static void memload(unsigned long count) +{
static unsigned long current = 0;
while (count > 0) {
unsigned long size;
if (count > MEM_BUFFER_SIZE)
size = MEM_BUFFER_SIZE;
else
size = count;
if (size > (MEM_BUFFER_SIZE - current))
size = MEM_BUFFER_SIZE - current;
memcpy(buffer[0], buffer[1], size);
I wonder if a memset would be better
Sure. Will do it.
count -= size;
current += size;
if (current >= MEM_BUFFER_SIZE)
current -= MEM_BUFFER_SIZE;
The size of MEM_BUFFER_SIZE will deeply impact how we will trash the cache and how the memory access (up to the ddr) will be the bottleneck. This should be at least configurable in the global section
I was thinking is there a generic way the get the cache size of the CPU, but I failed to do so. So I just set it as a size bigger than the size of cache on my laptop, which is 3MB. Yes I should make it configurable in the global section.
}
+}
static int run_event(event_data_t *event, int dry_run, unsigned long *perf, unsigned long *duration, rtapp_resource_t *resources) { @@ -196,6 +237,18 @@ static int run_event(event_data_t *event, int dry_run, pthread_mutex_unlock(&(ddata->res.mtx.obj)); break; }
case rtapp_mem:
{
log_debug("mem %d", event->count);
memload(event->count);
}
break;
case rtapp_iorun:
{
log_debug("iorun %d", event->count);
ioload(event->count);
}
break; } return lock;
@@ -488,6 +541,22 @@ int main(int argc, char* argv[])
parse_command_line(argc, argv, &opts);
/* allocate memory buffers for memory-bound and IO-bound busy loops */
buffer[0] = malloc(MEM_BUFFER_SIZE);
buffer[1] = malloc(MEM_BUFFER_SIZE);
You use the same buffer for all threads
I thought since we don't care about the content of the memory buffer, we could use the same buffer for all threads. But it just comes to my mind that the cache effect for each thread might be unpredictable in this case. I should allocate different buffers for different threads in the use case.
BTW, shall I allocate 2 buffers for each thread, one for "mem", and one for "iorun" ?
if (!buffer[0] || !buffer[1]) {
log_error("Cannot allocate memory buffers for memory-bound and"
"IO-bound busy loops");
exit(EXIT_FAILURE);
}
/* open file for IO-bound busy loops */
io_fd = open(opts.io_device, O_CREAT | O_WRONLY, 0644);
if (io_fd < 0) {
log_error("Cannot open io_device file %s", opts.io_device);
exit(EXIT_FAILURE);
}
/* allocated threads */ nthreads = opts.nthreads; threads = malloc(nthreads * sizeof(pthread_t));
@@ -713,6 +782,11 @@ int main(int argc, char* argv[]) close(ft_data.trace_fd); close(ft_data.marker_fd); }
close(io_fd);
free(buffer[0]);
free(buffer[1]);
exit(EXIT_SUCCESS);
diff --git a/src/rt-app.h b/src/rt-app.h index d55271f..477d493 100644 --- a/src/rt-app.h +++ b/src/rt-app.h @@ -36,6 +36,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#define BUDGET_OVERP 5
+#define MEM_BUFFER_SIZE (4 * 1024 * 1024)
Why 4MB ?
Please see above.
Thanks. Pi-Cheng
void *thread_body(void *arg);
#endif /* _RT_APP_H_ */ diff --git a/src/rt-app_parse_config.c b/src/rt-app_parse_config.c index e062f79..f715424 100644 --- a/src/rt-app_parse_config.c +++ b/src/rt-app_parse_config.c @@ -320,6 +320,22 @@ parse_thread_event_data(char *name, struct json_object *obj, return; }
if (!strncmp(name, "mem", strlen("mem")) ||
!strncmp(name, "iorun", strlen("iorun"))) {
if (!json_object_is_type(obj, json_type_int))
goto unknown_event;
data->count = json_object_get_int(obj);
if (!strncmp(name, "mem", strlen("mem")))
data->type = rtapp_mem;
else
data->type = rtapp_iorun;
log_info(PIN2 "type %d count %d", data->type, data->count);
return;
}
if (!strncmp(name, "lock", strlen("lock")) || !strncmp(name, "unlock", strlen("unlock"))) {
@@ -493,6 +509,10 @@ obj_is_event(char *name) return rtapp_suspend; if (!strncmp(name, "resume", strlen("resume"))) return rtapp_resume;
if (!strncmp(name, "mem", strlen("mem")))
return rtapp_mem;
if (!strncmp(name, "iorun", strlen("iorun")))
return rtapp_iorun; return 0;
} @@ -677,6 +697,7 @@ parse_global(struct json_object *global, rtapp_options_t *opts) opts->logbasename = strdup("rt-app"); opts->ftrace = 0; opts->pi_enabled = 0;
opts->io_device = strdup("/dev/null"); return; }
@@ -719,6 +740,8 @@ parse_global(struct json_object *global, rtapp_options_t *opts) TRUE, "rt-app"); opts->ftrace = get_bool_value_from(global, "ftrace", TRUE, 0); opts->pi_enabled = get_bool_value_from(global, "pi_enabled", TRUE, 0);
opts->io_device = get_string_value_from(global, "io_device", TRUE,
"/dev/null");
}
diff --git a/src/rt-app_types.h b/src/rt-app_types.h index cefb3a6..3a7c231 100644 --- a/src/rt-app_types.h +++ b/src/rt-app_types.h @@ -65,6 +65,8 @@ typedef enum resource_t rtapp_timer, rtapp_suspend, rtapp_resume,
rtapp_mem,
rtapp_iorun,
} resource_t;
struct _rtapp_mutex { @@ -103,6 +105,7 @@ typedef struct _event_data_t { int res; int dep; int duration;
int count;
} event_data_t;
typedef struct _phase_data_t { @@ -165,6 +168,7 @@ typedef struct _rtapp_options_t {
int ftrace; int die_on_dmiss;
char *io_device;
} rtapp_options_t;
typedef struct _timing_point_t {
1.9.1
On Wed, 2014-12-03 at 15:08 +0800, Pi-Cheng Chen wrote:
On 2 December 2014 at 19:04, Vincent Guittot guittot@linaro.org> wrote:
On 2 December 2014 at 08:21, pi-cheng.chen chen@linaro.org> wrote:
Add 2 new kind of event for running a memory or a io bounded load. "mem" name for a load is memory bounded, and "iorun" name for a load is io bounded. The default file to be written to create the load is /dev/null and the device/file could be specified with "io_device" key in "global" section.
Hi pi-cheng
What's the unit of the mem and iorun ? As an example, "mem" : 2000 will do a copy of 2000 Bytes ? KBytes ?
Hi Vincent,
Thanks for reviewing. The unit of mem and iorun here is the size to be copied/written in byte. As an example, "mem" : 2000 will do a 2000 bytes copy.
E.g. "tasks" : { "thread0" : { "sleep" : 1000, "run" : 100, "mem" : 1000, "sleep" 10000, "iorun" : 1000 } }, "global" : { "io_device" : "/dev/ttyS0" }
Signed-off-by: pi-cheng.chen chen@linaro.org>
src/rt-app.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ src/rt-app.h | 2 ++ src/rt-app_parse_config.c | 23 +++++++++++++++ src/rt-app_types.h | 4 +++ 4 files changed, 103 insertions(+)
diff --git a/src/rt-app.c b/src/rt-app.c index 3cd601d..13f72e4 100644 --- a/src/rt-app.c +++ b/src/rt-app.c @@ -33,6 +33,8 @@ static volatile int continue_running; static pthread_t *threads; static int nthreads; static int p_load; +static char *buffer[2]; +static int io_fd; rtapp_options_t opts;
static ftrace_data_t ft_data = { @@ -110,6 +112,45 @@ static inline loadwait(unsigned long exec) return load_count; }
+static void ioload(unsigned long count) +{
ssize_t ret;
char *buf = buffer[0];
while (count != 0) {
ret = write(io_fd, buffer, count);
count can be higher than buffer size
so you have defined a MEM_BUFFER_SIZE for mem transfer but not for iorun ?
I forgot to do such check for iorun. Will do it.
if (ret == -1) {
perror("write");
return;
}
count -= ret;
buf += ret;
}
+}
+static void memload(unsigned long count) +{
static unsigned long current = 0;
while (count > 0) {
unsigned long size;
if (count > MEM_BUFFER_SIZE)
size = MEM_BUFFER_SIZE;
else
size = count;
if (size > (MEM_BUFFER_SIZE - current))
size = MEM_BUFFER_SIZE - current;
memcpy(buffer[0], buffer[1], size);
I wonder if a memset would be better
Sure. Will do it.
count -= size;
current += size;
if (current >= MEM_BUFFER_SIZE)
current -= MEM_BUFFER_SIZE;
The size of MEM_BUFFER_SIZE will deeply impact how we will trash the cache and how the memory access (up to the ddr) will be the bottleneck. This should be at least configurable in the global section
I was thinking is there a generic way the get the cache size of the CPU, but I failed to do so. So I just set it as a size bigger than the size of cache on my laptop, which is 3MB. Yes I should make it configurable in the global section.
Ok, so buffer size could be specified in json file. But why there should be limitation like MEM_BUFFER_SIZE, could we just leave buffer size decision to user?
There is no guarantee that CPU will touch the buffers with memcpy. It depends how memcpy is implemented. Could we use straight buf1[x] = buf2[x] or something more fancy?
Thanks, Ivan
On 3 December 2014 at 17:24, Ivan T. Ivanov iivanov@mm-sol.com wrote:
On Wed, 2014-12-03 at 15:08 +0800, Pi-Cheng Chen wrote:
On 2 December 2014 at 19:04, Vincent Guittot guittot@linaro.org> wrote:
On 2 December 2014 at 08:21, pi-cheng.chen chen@linaro.org> wrote:
Add 2 new kind of event for running a memory or a io bounded load. "mem" name for a load is memory bounded, and "iorun" name for a load is io bounded. The default file to be written to create the load is /dev/null and the device/file could be specified with "io_device" key in "global" section.
Hi pi-cheng
What's the unit of the mem and iorun ? As an example, "mem" : 2000 will do a copy of 2000 Bytes ? KBytes ?
Hi Vincent,
Thanks for reviewing. The unit of mem and iorun here is the size to be copied/written in byte. As an example, "mem" : 2000 will do a 2000 bytes copy.
E.g. "tasks" : { "thread0" : { "sleep" : 1000, "run" : 100, "mem" : 1000, "sleep" 10000, "iorun" : 1000 } }, "global" : { "io_device" : "/dev/ttyS0" }
Signed-off-by: pi-cheng.chen chen@linaro.org>
src/rt-app.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ src/rt-app.h | 2 ++ src/rt-app_parse_config.c | 23 +++++++++++++++ src/rt-app_types.h | 4 +++ 4 files changed, 103 insertions(+)
diff --git a/src/rt-app.c b/src/rt-app.c index 3cd601d..13f72e4 100644 --- a/src/rt-app.c +++ b/src/rt-app.c @@ -33,6 +33,8 @@ static volatile int continue_running; static pthread_t *threads; static int nthreads; static int p_load; +static char *buffer[2]; +static int io_fd; rtapp_options_t opts;
static ftrace_data_t ft_data = { @@ -110,6 +112,45 @@ static inline loadwait(unsigned long exec) return load_count; }
+static void ioload(unsigned long count) +{
ssize_t ret;
char *buf = buffer[0];
while (count != 0) {
ret = write(io_fd, buffer, count);
count can be higher than buffer size
so you have defined a MEM_BUFFER_SIZE for mem transfer but not for iorun ?
I forgot to do such check for iorun. Will do it.
if (ret == -1) {
perror("write");
return;
}
count -= ret;
buf += ret;
}
+}
+static void memload(unsigned long count) +{
static unsigned long current = 0;
while (count > 0) {
unsigned long size;
if (count > MEM_BUFFER_SIZE)
size = MEM_BUFFER_SIZE;
else
size = count;
if (size > (MEM_BUFFER_SIZE - current))
size = MEM_BUFFER_SIZE - current;
memcpy(buffer[0], buffer[1], size);
I wonder if a memset would be better
Sure. Will do it.
count -= size;
current += size;
if (current >= MEM_BUFFER_SIZE)
current -= MEM_BUFFER_SIZE;
The size of MEM_BUFFER_SIZE will deeply impact how we will trash the cache and how the memory access (up to the ddr) will be the bottleneck. This should be at least configurable in the global section
I was thinking is there a generic way the get the cache size of the CPU, but I failed to do so. So I just set it as a size bigger than the size of cache on my laptop, which is 3MB. Yes I should make it configurable in the global section.
Ok, so buffer size could be specified in json file. But why there should be limitation like MEM_BUFFER_SIZE, could we just leave buffer size decision to user?
Hi Ivan,
MEM_BUFFER_SIZE should be replaced with the buffer size specified in "global" section in JSON file in next version. The reason why we have this limitation here is to prevent buffer overflow from happening. IOW, the size of memory to be write in a event (specified with "mem" key) might be larger than the memory buffer allocated to create the load (will be specified in "global" section).
There is no guarantee that CPU will touch the buffers with memcpy. It depends how memcpy is implemented. Could we use straight buf1[x] = buf2[x] or something more fancy?
How do you think about memset suggested by Vincent?
Thanks Pi-Cheng
Thanks, Ivan
On Wed, 2014-12-03 at 17:52 +0800, Pi-Cheng Chen wrote:
On 3 December 2014 at 17:24, Ivan T. Ivanov iivanov@mm-sol.com wrote:
On Wed, 2014-12-03 at 15:08 +0800, Pi-Cheng Chen wrote:
On 2 December 2014 at 19:04, Vincent Guittot guittot@linaro.org> wrote:
On 2 December 2014 at 08:21, pi-cheng.chen chen@linaro.org> wrote:
Add 2 new kind of event for running a memory or a io bounded load. "mem" name for a load is memory bounded, and "iorun" name for a load is io bounded. The default file to be written to create the load is /dev/null and the device/file could be specified with "io_device" key in "global" section.
Hi pi-cheng
What's the unit of the mem and iorun ? As an example, "mem" : 2000 will do a copy of 2000 Bytes ? KBytes ?
Hi Vincent,
Thanks for reviewing. The unit of mem and iorun here is the size to be copied/written in byte. As an example, "mem" : 2000 will do a 2000 bytes copy.
E.g. "tasks" : { "thread0" : { "sleep" : 1000, "run" : 100, "mem" : 1000, "sleep" 10000, "iorun" : 1000 } }, "global" : { "io_device" : "/dev/ttyS0" }
Signed-off-by: pi-cheng.chen chen@linaro.org>
src/rt-app.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ src/rt-app.h | 2 ++ src/rt-app_parse_config.c | 23 +++++++++++++++ src/rt-app_types.h | 4 +++ 4 files changed, 103 insertions(+)
diff --git a/src/rt-app.c b/src/rt-app.c index 3cd601d..13f72e4 100644 --- a/src/rt-app.c +++ b/src/rt-app.c @@ -33,6 +33,8 @@ static volatile int continue_running; static pthread_t *threads; static int nthreads; static int p_load; +static char *buffer[2]; +static int io_fd; rtapp_options_t opts;
static ftrace_data_t ft_data = { @@ -110,6 +112,45 @@ static inline loadwait(unsigned long exec) return load_count; }
+static void ioload(unsigned long count) +{
ssize_t ret;
char *buf = buffer[0];
while (count != 0) {
ret = write(io_fd, buffer, count);
count can be higher than buffer size
so you have defined a MEM_BUFFER_SIZE for mem transfer but not for iorun ?
I forgot to do such check for iorun. Will do it.
if (ret == -1) {
perror("write");
return;
}
count -= ret;
buf += ret;
}
+}
+static void memload(unsigned long count) +{
static unsigned long current = 0;
while (count > 0) {
unsigned long size;
if (count > MEM_BUFFER_SIZE)
size = MEM_BUFFER_SIZE;
else
size = count;
if (size > (MEM_BUFFER_SIZE - current))
size = MEM_BUFFER_SIZE - current;
memcpy(buffer[0], buffer[1], size);
I wonder if a memset would be better
Sure. Will do it.
count -= size;
current += size;
if (current >= MEM_BUFFER_SIZE)
current -= MEM_BUFFER_SIZE;
The size of MEM_BUFFER_SIZE will deeply impact how we will trash the cache and how the memory access (up to the ddr) will be the bottleneck. This should be at least configurable in the global section
I was thinking is there a generic way the get the cache size of the CPU, but I failed to do so. So I just set it as a size bigger than the size of cache on my laptop, which is 3MB. Yes I should make it configurable in the global section.
Ok, so buffer size could be specified in json file. But why there should be limitation like MEM_BUFFER_SIZE, could we just leave buffer size decision to user?
Hi Ivan,
MEM_BUFFER_SIZE should be replaced with the buffer size specified in "global" section in JSON file in next version. The reason why we have this limitation here is to prevent buffer overflow from happening. IOW, the size of memory to be write in a event (specified with "mem" key) might be larger than the memory buffer allocated to create the load (will be specified in "global" section).
Hm, I will like to be able to define memory access size per mem object. Something like attached patch.
There is no guarantee that CPU will touch the buffers with memcpy. It depends how memcpy is implemented. Could we use straight buf1[x] = buf2[x] or something more fancy?
How do you think about memset suggested by Vincent?
Yep.
Regards, Ivan