On 2 December 2014 at 19:04, Vincent Guittot vincent.guittot@linaro.org wrote:
On 2 December 2014 at 08:21, pi-cheng.chen pi-cheng.chen@linaro.org wrote:
Add 2 new kind of event for running a memory or a io bounded load. "mem" name for a load is memory bounded, and "iorun" name for a load is io bounded. The default file to be written to create the load is /dev/null and the device/file could be specified with "io_device" key in "global" section.
Hi pi-cheng
What's the unit of the mem and iorun ? As an example, "mem" : 2000 will do a copy of 2000 Bytes ? KBytes ?
Hi Vincent,
Thanks for reviewing. The unit of mem and iorun here is the size to be copied/written in byte. As an example, "mem" : 2000 will do a 2000 bytes copy.
E.g. "tasks" : { "thread0" : { "sleep" : 1000, "run" : 100, "mem" : 1000, "sleep" 10000, "iorun" : 1000 } }, "global" : { "io_device" : "/dev/ttyS0" }
Signed-off-by: pi-cheng.chen pi-cheng.chen@linaro.org
src/rt-app.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ src/rt-app.h | 2 ++ src/rt-app_parse_config.c | 23 +++++++++++++++ src/rt-app_types.h | 4 +++ 4 files changed, 103 insertions(+)
diff --git a/src/rt-app.c b/src/rt-app.c index 3cd601d..13f72e4 100644 --- a/src/rt-app.c +++ b/src/rt-app.c @@ -33,6 +33,8 @@ static volatile int continue_running; static pthread_t *threads; static int nthreads; static int p_load; +static char *buffer[2]; +static int io_fd; rtapp_options_t opts;
static ftrace_data_t ft_data = { @@ -110,6 +112,45 @@ static inline loadwait(unsigned long exec) return load_count; }
+static void ioload(unsigned long count) +{
ssize_t ret;
char *buf = buffer[0];
while (count != 0) {
ret = write(io_fd, buffer, count);
count can be higher than buffer size
so you have defined a MEM_BUFFER_SIZE for mem transfer but not for iorun ?
I forgot to do such check for iorun. Will do it.
if (ret == -1) {
perror("write");
return;
}
count -= ret;
buf += ret;
}
+}
+static void memload(unsigned long count) +{
static unsigned long current = 0;
while (count > 0) {
unsigned long size;
if (count > MEM_BUFFER_SIZE)
size = MEM_BUFFER_SIZE;
else
size = count;
if (size > (MEM_BUFFER_SIZE - current))
size = MEM_BUFFER_SIZE - current;
memcpy(buffer[0], buffer[1], size);
I wonder if a memset would be better
Sure. Will do it.
count -= size;
current += size;
if (current >= MEM_BUFFER_SIZE)
current -= MEM_BUFFER_SIZE;
The size of MEM_BUFFER_SIZE will deeply impact how we will trash the cache and how the memory access (up to the ddr) will be the bottleneck. This should be at least configurable in the global section
I was thinking is there a generic way the get the cache size of the CPU, but I failed to do so. So I just set it as a size bigger than the size of cache on my laptop, which is 3MB. Yes I should make it configurable in the global section.
}
+}
static int run_event(event_data_t *event, int dry_run, unsigned long *perf, unsigned long *duration, rtapp_resource_t *resources) { @@ -196,6 +237,18 @@ static int run_event(event_data_t *event, int dry_run, pthread_mutex_unlock(&(ddata->res.mtx.obj)); break; }
case rtapp_mem:
{
log_debug("mem %d", event->count);
memload(event->count);
}
break;
case rtapp_iorun:
{
log_debug("iorun %d", event->count);
ioload(event->count);
}
break; } return lock;
@@ -488,6 +541,22 @@ int main(int argc, char* argv[])
parse_command_line(argc, argv, &opts);
/* allocate memory buffers for memory-bound and IO-bound busy loops */
buffer[0] = malloc(MEM_BUFFER_SIZE);
buffer[1] = malloc(MEM_BUFFER_SIZE);
You use the same buffer for all threads
I thought since we don't care about the content of the memory buffer, we could use the same buffer for all threads. But it just comes to my mind that the cache effect for each thread might be unpredictable in this case. I should allocate different buffers for different threads in the use case.
BTW, shall I allocate 2 buffers for each thread, one for "mem", and one for "iorun" ?
if (!buffer[0] || !buffer[1]) {
log_error("Cannot allocate memory buffers for memory-bound and"
"IO-bound busy loops");
exit(EXIT_FAILURE);
}
/* open file for IO-bound busy loops */
io_fd = open(opts.io_device, O_CREAT | O_WRONLY, 0644);
if (io_fd < 0) {
log_error("Cannot open io_device file %s", opts.io_device);
exit(EXIT_FAILURE);
}
/* allocated threads */ nthreads = opts.nthreads; threads = malloc(nthreads * sizeof(pthread_t));
@@ -713,6 +782,11 @@ int main(int argc, char* argv[]) close(ft_data.trace_fd); close(ft_data.marker_fd); }
close(io_fd);
free(buffer[0]);
free(buffer[1]);
exit(EXIT_SUCCESS);
diff --git a/src/rt-app.h b/src/rt-app.h index d55271f..477d493 100644 --- a/src/rt-app.h +++ b/src/rt-app.h @@ -36,6 +36,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#define BUDGET_OVERP 5
+#define MEM_BUFFER_SIZE (4 * 1024 * 1024)
Why 4MB ?
Please see above.
Thanks. Pi-Cheng
void *thread_body(void *arg);
#endif /* _RT_APP_H_ */ diff --git a/src/rt-app_parse_config.c b/src/rt-app_parse_config.c index e062f79..f715424 100644 --- a/src/rt-app_parse_config.c +++ b/src/rt-app_parse_config.c @@ -320,6 +320,22 @@ parse_thread_event_data(char *name, struct json_object *obj, return; }
if (!strncmp(name, "mem", strlen("mem")) ||
!strncmp(name, "iorun", strlen("iorun"))) {
if (!json_object_is_type(obj, json_type_int))
goto unknown_event;
data->count = json_object_get_int(obj);
if (!strncmp(name, "mem", strlen("mem")))
data->type = rtapp_mem;
else
data->type = rtapp_iorun;
log_info(PIN2 "type %d count %d", data->type, data->count);
return;
}
if (!strncmp(name, "lock", strlen("lock")) || !strncmp(name, "unlock", strlen("unlock"))) {
@@ -493,6 +509,10 @@ obj_is_event(char *name) return rtapp_suspend; if (!strncmp(name, "resume", strlen("resume"))) return rtapp_resume;
if (!strncmp(name, "mem", strlen("mem")))
return rtapp_mem;
if (!strncmp(name, "iorun", strlen("iorun")))
return rtapp_iorun; return 0;
} @@ -677,6 +697,7 @@ parse_global(struct json_object *global, rtapp_options_t *opts) opts->logbasename = strdup("rt-app"); opts->ftrace = 0; opts->pi_enabled = 0;
opts->io_device = strdup("/dev/null"); return; }
@@ -719,6 +740,8 @@ parse_global(struct json_object *global, rtapp_options_t *opts) TRUE, "rt-app"); opts->ftrace = get_bool_value_from(global, "ftrace", TRUE, 0); opts->pi_enabled = get_bool_value_from(global, "pi_enabled", TRUE, 0);
opts->io_device = get_string_value_from(global, "io_device", TRUE,
"/dev/null");
}
diff --git a/src/rt-app_types.h b/src/rt-app_types.h index cefb3a6..3a7c231 100644 --- a/src/rt-app_types.h +++ b/src/rt-app_types.h @@ -65,6 +65,8 @@ typedef enum resource_t rtapp_timer, rtapp_suspend, rtapp_resume,
rtapp_mem,
rtapp_iorun,
} resource_t;
struct _rtapp_mutex { @@ -103,6 +105,7 @@ typedef struct _event_data_t { int res; int dep; int duration;
int count;
} event_data_t;
typedef struct _phase_data_t { @@ -165,6 +168,7 @@ typedef struct _rtapp_options_t {
int ftrace; int die_on_dmiss;
char *io_device;
} rtapp_options_t;
typedef struct _timing_point_t {
1.9.1