123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156 |
- From 9c739800a8915d5f2a73c840190920e95ffa1c5c Mon Sep 17 00:00:00 2001
- From: Reini Urban <rurban@cpan.org>
- Date: Fri, 18 Feb 2022 09:46:45 +0100
- Subject: [PATCH] fix armv7 asm inline error GH #115
- some armv7 buildroot variants fail on asm.
- we already probe for that, so use it.
- Fixes GH #115
- [Retrieved from:
- https://github.com/rurban/safeclib/commit/9c739800a8915d5f2a73c840190920e95ffa1c5c]
- Signed-off-by: Fabrice Fontaine <fontaine.fabrice@gmail.com>
- ---
- tests/perf_private.h | 49 +++++++++++++++++++++++++-------------------
- 1 file changed, 28 insertions(+), 21 deletions(-)
- diff --git a/tests/perf_private.h b/tests/perf_private.h
- index 3296cb3d..843674d3 100644
- --- a/tests/perf_private.h
- +++ b/tests/perf_private.h
- @@ -1,9 +1,9 @@
- /*------------------------------------------------------------------
- * perf_private.h - Internal benchmarking tools
- *
- - * 2020 Reini Urban
- + * 2020,2022 Reini Urban
- *
- - * Copyright (c) 2017, 2020 Reini Urban
- + * Copyright (c) 2017, 2020, 2022 Reini Urban
- * All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- @@ -55,13 +55,16 @@ static inline uint64_t timer_start();
- static inline uint64_t timer_end();
-
- static inline clock_t rdtsc() {
- -#ifdef __x86_64__
- +#ifndef ASM_INLINE
- +#define NO_CYCLE_COUNTER
- + return clock();
- +#elif defined __x86_64__
- uint64_t a, d;
- - __asm__ volatile("rdtsc" : "=a"(a), "=d"(d));
- + ASM_INLINE volatile("rdtsc" : "=a"(a), "=d"(d));
- return (clock_t)(a | (d << 32));
- #elif defined(__i386__)
- clock_t x;
- - __asm__ volatile("rdtsc" : "=A"(x));
- + ASM_INLINE volatile("rdtsc" : "=A"(x));
- return x;
- #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && (SIZEOF_SIZE_T == 4)
- // V7 is the earliest arch that has a standard cyclecount (some say 6)
- @@ -69,11 +72,11 @@ static inline clock_t rdtsc() {
- uint32_t pmuseren;
- uint32_t pmcntenset;
- // Read the user mode perf monitor counter access permissions.
- - asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
- + ASM_INLINE volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
- if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
- - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
- + ASM_INLINE volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
- if (pmcntenset & 0x80000000ul) { // Is it counting?
- - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
- + ASM_INLINE volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
- // The counter is set up to count every 64th cycle
- return (int64_t)(pmccntr) * 64; // Should optimize to << 6
- }
- @@ -83,22 +86,22 @@ static inline clock_t rdtsc() {
- uint64_t pmccntr;
- uint64_t pmuseren = 1UL;
- // Read the user mode perf monitor counter access permissions.
- - //asm volatile("mrs cntv_ctl_el0, %0" : "=r" (pmuseren));
- + //ASM_INLINE volatile("mrs cntv_ctl_el0, %0" : "=r" (pmuseren));
- if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
- - asm volatile("mrs %0, cntvct_el0" : "=r" (pmccntr));
- + ASM_INLINE volatile("mrs %0, cntvct_el0" : "=r" (pmccntr));
- return (uint64_t)(pmccntr) * 64; // Should optimize to << 6
- }
- return (uint64_t)rdtsc();
- #elif defined(__powerpc64__) || defined(__ppc64__)
- uint64_t tb;
- - __asm__ volatile (\
- + ASM_INLINE volatile (\
- "mfspr %0, 268"
- : "=r" (tb));
- return tb;
- #elif defined(__powerpc__) || defined(__ppc__)
- // This returns a time-base, which is not always precisely a cycle-count.
- uint32_t tbu, tbl, tmp;
- - __asm__ volatile (\
- + ASM_INLINE volatile (\
- "0:\n"
- "mftbu %0\n"
- "mftbl %1\n"
- @@ -109,12 +112,12 @@ static inline clock_t rdtsc() {
- return (((uint64_t) tbu << 32) | tbl);
- #elif defined(__sparc__)
- uint64_t tick;
- - asm(".byte 0x83, 0x41, 0x00, 0x00");
- - asm("mov %%g1, %0" : "=r" (tick));
- + ASM_INLINE(".byte 0x83, 0x41, 0x00, 0x00");
- + ASM_INLINE("mov %%g1, %0" : "=r" (tick));
- return tick;
- #elif defined(__ia64__)
- uint64_t itc;
- - asm("mov %0 = ar.itc" : "=r" (itc));
- + ASM_INLINE("mov %0 = ar.itc" : "=r" (itc));
- return itc;
- #else
- #define NO_CYCLE_COUNTER
- @@ -126,9 +129,11 @@ static inline clock_t rdtsc() {
- // 3.2.1 The Improved Benchmarking Method
- static inline uint64_t timer_start()
- {
- -#if defined (__i386__) || (defined(__x86_64__) && SIZEOF_SIZE_T == 4)
- +#ifndef ASM_INLINE
- + return (uint64_t)rdtsc();
- +#elif defined (__i386__) || (defined(__x86_64__) && SIZEOF_SIZE_T == 4)
- uint32_t cycles_high, cycles_low;
- - __asm__ volatile
- + ASM_INLINE volatile
- ("cpuid\n\t"
- "rdtsc\n\t"
- "mov %%edx, %0\n\t"
- @@ -137,7 +142,7 @@ static inline uint64_t timer_start()
- return ((uint64_t)cycles_high << 32) | cycles_low;
- #elif defined __x86_64__
- uint32_t cycles_high, cycles_low;
- - __asm__ volatile
- + ASM_INLINE volatile
- ("cpuid\n\t"
- "rdtsc\n\t"
- "mov %%edx, %0\n\t"
- @@ -151,9 +156,11 @@ static inline uint64_t timer_start()
-
- static inline uint64_t timer_end()
- {
- -#if defined (__i386__) || (defined(__x86_64__) && defined (HAVE_BIT32))
- +#ifndef ASM_INLINE
- + return (uint64_t)rdtsc();
- +#elif defined (__i386__) || (defined(__x86_64__) && defined (HAVE_BIT32))
- uint32_t cycles_high, cycles_low;
- - __asm__ volatile
- + ASM_INLINE volatile
- ("rdtscp\n\t"
- "mov %%edx, %0\n\t"
- "mov %%eax, %1\n\t"
- @@ -162,7 +169,7 @@ static inline uint64_t timer_end()
- return ((uint64_t)cycles_high << 32) | cycles_low;
- #elif defined __x86_64__
- uint32_t cycles_high, cycles_low;
- - __asm__ volatile
- + ASM_INLINE volatile
- ("rdtscp\n\t"
- "mov %%edx, %0\n\t"
- "mov %%eax, %1\n\t"
|