Back to home page

LXR

 
 

    


File indexing completed on 2025-05-11 08:24:43

0001 /*
0002  * Copyright (C) 2017, 2024 embedded brains GmbH & Co. KG
0003  *
0004  * The license and distribution terms for this file may be
0005  * found in the file LICENSE in this distribution or at
0006  * http://www.rtems.com/license/LICENSE.
0007  */
0008 
0009 /*
0010  * This OpenMP micro benchmark is based on mailing list posts from Jakub
0011  * Jelinek.
0012  *
0013  * Subject: [gomp3] libgomp performance improvements
0014  * https://gcc.gnu.org/ml/gcc-patches/2008-03/msg00930.html
0015  *
0016  * Subject: [gomp3] Use private futexes if possible
0017  * https://gcc.gnu.org/ml/gcc-patches/2008-03/msg01126.html
0018  *
0019  * This file can be compiled on Linux, etc. using:
0020  *
0021  * cc -std=c11 -O2 -fopenmp init.c
0022  */
0023 
0024 #ifdef HAVE_CONFIG_H
0025 #include "config.h"
0026 #endif
0027 
0028 #include <omp.h>
0029 #include <stdio.h>
0030 
0031 #ifdef __rtems__
0032 #include <pthread.h>
0033 #include <tmacros.h>
0034 
0035 const char rtems_test_name[] = "SMPOPENMP 1";
0036 
0037 #define CPU_COUNT_MAX 32
0038 #endif /* __rtems__ */
0039 
0040 static void work(void)
0041 {
0042   __asm__ volatile ("" : : : "memory");
0043 }
0044 
0045 static void barrier_bench(void)
0046 {
0047   #pragma omp parallel
0048   for (int i = 0; i < 10000; ++i) {
0049     work();
0050     #pragma omp barrier
0051     work();
0052     #pragma omp barrier
0053     work();
0054     #pragma omp barrier
0055     work();
0056     #pragma omp barrier
0057     work();
0058     #pragma omp barrier
0059     work();
0060     #pragma omp barrier
0061     work();
0062     #pragma omp barrier
0063     work();
0064     #pragma omp barrier
0065     work();
0066     #pragma omp barrier
0067     work();
0068     #pragma omp barrier
0069     work();
0070   }
0071 }
0072 
0073 static void parallel_bench(void)
0074 {
0075   for (int i = 0; i < 20000; ++i) {
0076     #pragma omp parallel
0077     work();
0078   }
0079 }
0080 
0081 static void static_bench(void)
0082 {
0083   for (int i = 0; i < 1000; ++i) {
0084     #pragma omp parallel for schedule (static)
0085     for (int j = 0; j < 100; ++j) {
0086       work();
0087     }
0088   }
0089 }
0090 
0091 static void dynamic_bench(void)
0092 {
0093   #pragma omp parallel for schedule (dynamic)
0094   for (int i = 0; i < 100000; ++i) {
0095     work();
0096   }
0097 }
0098 
0099 static void guided_bench(void)
0100 {
0101   #pragma omp parallel for schedule (guided)
0102   for (int i = 0; i < 100000; ++i) {
0103     work();
0104   }
0105 }
0106 
0107 static void runtime_bench(void)
0108 {
0109   #pragma omp parallel for schedule (runtime)
0110   for (int i = 0; i < 100000; ++i) {
0111     work();
0112   }
0113 }
0114 
0115 static void single_bench(void)
0116 {
0117   #pragma omp parallel
0118   for (int i = 0; i < 10000; ++i) {
0119     #pragma omp single
0120     work();
0121   }
0122 }
0123 
0124 static void all(void)
0125 {
0126   barrier_bench();
0127   parallel_bench();
0128   static_bench();
0129   dynamic_bench();
0130   guided_bench();
0131   runtime_bench();
0132   single_bench();
0133 }
0134 
0135 static void do_bench(const char *name, void (*bench)(void), int n)
0136 {
0137   double start;
0138   double delta;
0139 
0140   (*bench)();
0141   start = omp_get_wtime();
0142   for (int i = 0; i < n; ++i) {
0143     (*bench)();
0144   }
0145   delta = omp_get_wtime() - start;
0146   printf(",\n    \"%s-bench\": %f", name, delta);
0147 }
0148 
0149 static const char *test_sep = "";
0150 
0151 static void microbench(int num_threads, int n)
0152 {
0153   omp_set_num_threads(num_threads);
0154   printf(
0155     "%s{\n"
0156     "    \"num-threads\": %i,\n"
0157     "    \"major-loop-count\": %i",
0158     test_sep,
0159     num_threads,
0160     n
0161   );
0162   test_sep = ", ";
0163   do_bench("barrier", barrier_bench, n);
0164   do_bench("parallel", parallel_bench, n);
0165   do_bench("static", static_bench, n);
0166   do_bench("dynamic", dynamic_bench, n);
0167   do_bench("guided", guided_bench, n);
0168   do_bench("runtime", runtime_bench, n);
0169   do_bench("single", single_bench, n);
0170   printf("\n  }");
0171 }
0172 
0173 static int estimate_3s_runtime_with_one_proc(void)
0174 {
0175   double start;
0176   double delta;
0177   int n;
0178 
0179   omp_set_num_threads(1);
0180   all();
0181   start = omp_get_wtime();
0182   all();
0183   delta = omp_get_wtime() - start;
0184 
0185   if (delta > 0.0 && delta <= 1.0) {
0186     n = (int) (3.0 / delta);
0187   } else {
0188     n = 1;
0189   }
0190 
0191   return n;
0192 }
0193 
0194 static void test(void)
0195 {
0196   int num_procs;
0197   int n;
0198 
0199   printf("*** BEGIN OF JSON DATA ***\n[\n  ");
0200 
0201   n = estimate_3s_runtime_with_one_proc();
0202   num_procs = omp_get_num_procs();
0203   omp_set_num_threads(num_procs);
0204 
0205   for (int i = 1; i <= num_procs; ++i) {
0206     microbench(i, n);
0207   }
0208 
0209   printf("\n]\n*** END OF JSON DATA ***\n");
0210 }
0211 
0212 #ifdef __rtems__
0213 
0214 static void Init(rtems_task_argument arg)
0215 {
0216   rtems_status_code sc;
0217   cpu_set_t cpu_set;
0218 
0219   rtems_print_printer_fprintf_putc(&rtems_test_printer);
0220   TEST_BEGIN();
0221 
0222   CPU_ZERO(&cpu_set);
0223   CPU_SET(0, &cpu_set);
0224 
0225   sc = rtems_task_set_affinity(RTEMS_SELF, sizeof(cpu_set), &cpu_set);
0226   rtems_test_assert(sc == RTEMS_SUCCESSFUL);
0227 
0228   test();
0229   TEST_END();
0230   rtems_test_exit(0);
0231 }
0232 
0233 typedef struct {
0234   pthread_mutex_t mtx;
0235   pthread_cond_t cnd;
0236   bool cpus_used[CPU_COUNT_MAX];
0237 } test_context;
0238 
0239 static test_context test_instance;
0240 
0241 static uint32_t find_free_cpu(test_context *ctx)
0242 {
0243   uint32_t i;
0244   uint32_t n;
0245 
0246   n = rtems_scheduler_get_processor_maximum();
0247 
0248   pthread_mutex_lock(&ctx->mtx);
0249 
0250   do {
0251     for (i = 1; i < n; ++i) {
0252       if (!ctx->cpus_used[i]) {
0253         ctx->cpus_used[i] = true;
0254         break;
0255       }
0256     }
0257 
0258     if (i == n) {
0259       pthread_cond_wait(&ctx->cnd, &ctx->mtx);
0260     }
0261   } while (i == n);
0262 
0263   pthread_mutex_unlock(&ctx->mtx);
0264 
0265   return i;
0266 }
0267 
0268 static void begin_extension(Thread_Control *th)
0269 {
0270   rtems_id th_id;
0271 
0272   th_id = th->Object.id;
0273 
0274   if (rtems_object_id_get_api(th_id) == OBJECTS_POSIX_API) {
0275     rtems_status_code sc;
0276     rtems_id sched_id;
0277     uint32_t cpu_index;
0278     cpu_set_t cpu_set;
0279     rtems_task_priority prio;
0280 
0281     cpu_index = find_free_cpu(&test_instance);
0282 
0283     sc = rtems_scheduler_ident_by_processor(cpu_index, &sched_id);
0284     rtems_test_assert(sc == RTEMS_SUCCESSFUL);
0285 
0286     sc = rtems_task_set_priority(th_id, RTEMS_CURRENT_PRIORITY, &prio);
0287     rtems_test_assert(sc == RTEMS_SUCCESSFUL);
0288 
0289     sc = rtems_task_set_scheduler(th_id, sched_id, prio);
0290     rtems_test_assert(sc == RTEMS_SUCCESSFUL);
0291 
0292     CPU_ZERO(&cpu_set);
0293     CPU_SET((int) cpu_index, &cpu_set);
0294 
0295     sc = rtems_task_set_affinity(th_id, sizeof(cpu_set), &cpu_set);
0296     rtems_test_assert(sc == RTEMS_SUCCESSFUL);
0297   }
0298 }
0299 
0300 static void terminate_extension(Thread_Control *th)
0301 {
0302   rtems_id th_id;
0303 
0304   th_id = th->Object.id;
0305 
0306   if (rtems_object_id_get_api(th_id) == OBJECTS_POSIX_API) {
0307     rtems_status_code sc;
0308     cpu_set_t cpu_set;
0309     uint32_t cpu_index;
0310     test_context *ctx;
0311 
0312     sc = rtems_task_get_affinity(th_id, sizeof(cpu_set), &cpu_set);
0313     rtems_test_assert(sc == RTEMS_SUCCESSFUL);
0314 
0315     cpu_index = CPU_FFS(&cpu_set) - 1;
0316 
0317     ctx = &test_instance;
0318 
0319     pthread_mutex_lock(&ctx->mtx);
0320     rtems_test_assert(ctx->cpus_used[cpu_index]);
0321     ctx->cpus_used[cpu_index] = false;
0322     pthread_cond_broadcast(&ctx->cnd);
0323     pthread_mutex_unlock(&ctx->mtx);
0324   }
0325 }
0326 
0327 #define CONFIGURE_APPLICATION_NEEDS_CLOCK_DRIVER
0328 #define CONFIGURE_APPLICATION_NEEDS_SIMPLE_CONSOLE_DRIVER
0329 
0330 #define CONFIGURE_MAXIMUM_PROCESSORS CPU_COUNT_MAX
0331 
0332 #define CONFIGURE_UNLIMITED_OBJECTS
0333 #define CONFIGURE_UNIFIED_WORK_AREAS
0334 
0335 #define CONFIGURE_INITIAL_EXTENSIONS \
0336   { \
0337     .thread_begin = begin_extension, \
0338     .thread_terminate = terminate_extension \
0339   }, \
0340   RTEMS_TEST_INITIAL_EXTENSION
0341 
0342 #define CONFIGURE_INIT_TASK_ATTRIBUTES RTEMS_FLOATING_POINT
0343 
0344 #define CONFIGURE_RTEMS_INIT_TASKS_TABLE
0345 
0346 #define CONFIGURE_INIT
0347 
0348 #include <rtems/confdefs.h>
0349 
0350 #else /* __rtems__ */
0351 
0352 int main(void)
0353 {
0354   test();
0355   return 0;
0356 }
0357 
0358 #endif /* __rtems__ */