mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00
The binary GCD algorithm is based on the following facts:
1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2)
2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b)
3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b)
Even on x86 machines with reasonable division hardware, the binary
algorithm runs about 25% faster (80% the execution time) than the
division-based Euclidian algorithm.
On platforms like Alpha and ARMv6 where division is a function call to
emulation code, it's even more significant.
There are two variants of the code here, depending on whether a fast
__ffs (find least significant set bit) instruction is available. This
allows the unpredictable branches in the bit-at-a-time shifting loop to
be eliminated.
If fast __ffs is not available, the "even/odd" GCD variant is used.
I use the following code to benchmark:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#define swap(a, b) \
do { \
a ^= b; \
b ^= a; \
a ^= b; \
} while (0)
unsigned long gcd0(unsigned long a, unsigned long b)
{
unsigned long r;
if (a < b) {
swap(a, b);
}
if (b == 0)
return a;
while ((r = a % b) != 0) {
a = b;
b = r;
}
return b;
}
unsigned long gcd1(unsigned long a, unsigned long b)
{
unsigned long r = a | b;
if (!a || !b)
return r;
b >>= __builtin_ctzl(b);
for (;;) {
a >>= __builtin_ctzl(a);
if (a == b)
return a << __builtin_ctzl(r);
if (a < b)
swap(a, b);
a -= b;
}
}
unsigned long gcd2(unsigned long a, unsigned long b)
{
unsigned long r = a | b;
if (!a || !b)
return r;
r &= -r;
while (!(b & r))
b >>= 1;
for (;;) {
while (!(a & r))
a >>= 1;
if (a == b)
return a;
if (a < b)
swap(a, b);
a -= b;
a >>= 1;
if (a & r)
a += b;
a >>= 1;
}
}
unsigned long gcd3(unsigned long a, unsigned long b)
{
unsigned long r = a | b;
if (!a || !b)
return r;
b >>= __builtin_ctzl(b);
if (b == 1)
return r & -r;
for (;;) {
a >>= __builtin_ctzl(a);
if (a == 1)
return r & -r;
if (a == b)
return a << __builtin_ctzl(r);
if (a < b)
swap(a, b);
a -= b;
}
}
unsigned long gcd4(unsigned long a, unsigned long b)
{
unsigned long r = a | b;
if (!a || !b)
return r;
r &= -r;
while (!(b & r))
b >>= 1;
if (b == r)
return r;
for (;;) {
while (!(a & r))
a >>= 1;
if (a == r)
return r;
if (a == b)
return a;
if (a < b)
swap(a, b);
a -= b;
a >>= 1;
if (a & r)
a += b;
a >>= 1;
}
}
static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = {
gcd0, gcd1, gcd2, gcd3, gcd4,
};
#define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0]))
#if defined(__x86_64__)
#define rdtscll(val) do { \
unsigned long __a,__d; \
__asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
(val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \
} while(0)
static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
unsigned long a, unsigned long b, unsigned long *res)
{
unsigned long long start, end;
unsigned long long ret;
unsigned long gcd_res;
rdtscll(start);
gcd_res = gcd(a, b);
rdtscll(end);
if (end >= start)
ret = end - start;
else
ret = ~0ULL - start + 1 + end;
*res = gcd_res;
return ret;
}
#else
static inline struct timespec read_time(void)
{
struct timespec time;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time);
return time;
}
static inline unsigned long long diff_time(struct timespec start, struct timespec end)
{
struct timespec temp;
if ((end.tv_nsec - start.tv_nsec) < 0) {
temp.tv_sec = end.tv_sec - start.tv_sec - 1;
temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec - start.tv_sec;
temp.tv_nsec = end.tv_nsec - start.tv_nsec;
}
return temp.tv_sec * 1000000000ULL + temp.tv_nsec;
}
static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
unsigned long a, unsigned long b, unsigned long *res)
{
struct timespec start, end;
unsigned long gcd_res;
start = read_time();
gcd_res = gcd(a, b);
end = read_time();
*res = gcd_res;
return diff_time(start, end);
}
#endif
static inline unsigned long get_rand()
{
if (sizeof(long) == 8)
return (unsigned long)rand() << 32 | rand();
else
return rand();
}
int main(int argc, char **argv)
{
unsigned int seed = time(0);
int loops = 100;
int repeats = 1000;
unsigned long (*res)[TEST_ENTRIES];
unsigned long long elapsed[TEST_ENTRIES];
int i, j, k;
for (;;) {
int opt = getopt(argc, argv, "n:r:s:");
/* End condition always first */
if (opt == -1)
break;
switch (opt) {
case 'n':
loops = atoi(optarg);
break;
case 'r':
repeats = atoi(optarg);
break;
case 's':
seed = strtoul(optarg, NULL, 10);
break;
default:
/* You won't actually get here. */
break;
}
}
res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops);
memset(elapsed, 0, sizeof(elapsed));
srand(seed);
for (j = 0; j < loops; j++) {
unsigned long a = get_rand();
/* Do we have args? */
unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
unsigned long long min_elapsed[TEST_ENTRIES];
for (k = 0; k < repeats; k++) {
for (i = 0; i < TEST_ENTRIES; i++) {
unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]);
if (k == 0 || min_elapsed[i] > tmp)
min_elapsed[i] = tmp;
}
}
for (i = 0; i < TEST_ENTRIES; i++)
elapsed[i] += min_elapsed[i];
}
for (i = 0; i < TEST_ENTRIES; i++)
printf("gcd%d: elapsed %llu\n", i, elapsed[i]);
k = 0;
srand(seed);
for (j = 0; j < loops; j++) {
unsigned long a = get_rand();
unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
for (i = 1; i < TEST_ENTRIES; i++) {
if (res[j][i] != res[j][0])
break;
}
if (i < TEST_ENTRIES) {
if (k == 0) {
k = 1;
fprintf(stderr, "Error:\n");
}
fprintf(stderr, "gcd(%lu, %lu): ", a, b);
for (i = 0; i < TEST_ENTRIES; i++)
fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n");
}
}
if (k == 0)
fprintf(stderr, "PASS\n");
free(res);
return 0;
}
Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got:
zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
gcd0: elapsed 10174
gcd1: elapsed 2120
gcd2: elapsed 2902
gcd3: elapsed 2039
gcd4: elapsed 2812
PASS
zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
gcd0: elapsed 9309
gcd1: elapsed 2280
gcd2: elapsed 2822
gcd3: elapsed 2217
gcd4: elapsed 2710
PASS
zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
gcd0: elapsed 9589
gcd1: elapsed 2098
gcd2: elapsed 2815
gcd3: elapsed 2030
gcd4: elapsed 2718
PASS
zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
gcd0: elapsed 9914
gcd1: elapsed 2309
gcd2: elapsed 2779
gcd3: elapsed 2228
gcd4: elapsed 2709
PASS
[akpm@linux-foundation.org: avoid #defining a CONFIG_ variable]
Signed-off-by: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
Signed-off-by: George Spelvin <linux@horizon.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
413 lines
9.0 KiB
Plaintext
413 lines
9.0 KiB
Plaintext
config M32R
|
|
bool
|
|
default y
|
|
select HAVE_IDE
|
|
select HAVE_OPROFILE
|
|
select INIT_ALL_POSSIBLE
|
|
select HAVE_KERNEL_GZIP
|
|
select HAVE_KERNEL_BZIP2
|
|
select HAVE_KERNEL_LZMA
|
|
select ARCH_WANT_IPC_PARSE_VERSION
|
|
select HAVE_DEBUG_BUGVERBOSE
|
|
select VIRT_TO_BUS
|
|
select GENERIC_IRQ_PROBE
|
|
select GENERIC_IRQ_SHOW
|
|
select GENERIC_ATOMIC64
|
|
select ARCH_HAS_DEVMEM_IS_ALLOWED
|
|
select ARCH_USES_GETTIMEOFFSET
|
|
select MODULES_USE_ELF_RELA
|
|
select HAVE_DEBUG_STACKOVERFLOW
|
|
select CPU_NO_EFFICIENT_FFS
|
|
|
|
config SBUS
|
|
bool
|
|
|
|
config GENERIC_ISA_DMA
|
|
bool
|
|
default y
|
|
|
|
config ZONE_DMA
|
|
bool
|
|
default y
|
|
|
|
config NO_IOPORT_MAP
|
|
def_bool y
|
|
|
|
config NO_DMA
|
|
def_bool y
|
|
|
|
config HZ
|
|
int
|
|
default 100
|
|
|
|
source "init/Kconfig"
|
|
|
|
source "kernel/Kconfig.freezer"
|
|
|
|
|
|
menu "Processor type and features"
|
|
|
|
choice
|
|
prompt "Platform Type"
|
|
default PLAT_MAPPI
|
|
|
|
config PLAT_MAPPI
|
|
bool "Mappi-I"
|
|
help
|
|
The Mappi-I is an FPGA board for SOC (System-On-a-Chip) prototyping.
|
|
You can operate a Linux system on this board by using an M32R
|
|
softmacro core, which is a fully-synthesizable functional model
|
|
described in Verilog-HDL.
|
|
|
|
The Mappi-I board was the first platform, which had been used
|
|
to port and develop a Linux system for the M32R processor.
|
|
Currently, the Mappi-II, an heir to the Mappi-I, is available.
|
|
|
|
config PLAT_USRV
|
|
bool "uServer"
|
|
select PLAT_HAS_INT1ICU
|
|
|
|
config PLAT_M32700UT
|
|
bool "M32700UT"
|
|
select PLAT_HAS_INT0ICU
|
|
select PLAT_HAS_INT1ICU
|
|
select PLAT_HAS_INT2ICU
|
|
help
|
|
The M3T-M32700UT is an evaluation board based on uT-Engine
|
|
specification. This board has an M32700 (Chaos) evaluation chip.
|
|
You can say Y for SMP, because the M32700 is a single chip
|
|
multiprocessor.
|
|
|
|
config PLAT_OPSPUT
|
|
bool "OPSPUT"
|
|
select PLAT_HAS_INT0ICU
|
|
select PLAT_HAS_INT1ICU
|
|
select PLAT_HAS_INT2ICU
|
|
help
|
|
The OPSPUT is an evaluation board based on uT-Engine
|
|
specification. This board has a OPSP-REP chip.
|
|
|
|
config PLAT_OAKS32R
|
|
bool "OAKS32R"
|
|
help
|
|
The OAKS32R is a tiny, inexpensive evaluation board.
|
|
Please note that if you say Y here and choose chip "M32102",
|
|
say N for MMU and select a no-MMU version kernel, otherwise
|
|
a kernel with MMU support will not work, because the M32102
|
|
is a microcontroller for embedded systems and it has no MMU.
|
|
|
|
config PLAT_MAPPI2
|
|
bool "Mappi-II(M3A-ZA36/M3A-ZA52)"
|
|
|
|
config PLAT_MAPPI3
|
|
bool "Mappi-III(M3A-2170)"
|
|
|
|
config PLAT_M32104UT
|
|
bool "M32104UT"
|
|
select PLAT_HAS_INT1ICU
|
|
help
|
|
The M3T-M32104UT is an reference board based on uT-Engine
|
|
specification. This board has a M32104 chip.
|
|
|
|
endchoice
|
|
|
|
choice
|
|
prompt "Processor family"
|
|
default CHIP_M32700
|
|
|
|
config CHIP_M32700
|
|
bool "M32700 (Chaos)"
|
|
|
|
config CHIP_M32102
|
|
bool "M32102"
|
|
|
|
config CHIP_M32104
|
|
bool "M32104"
|
|
depends on PLAT_M32104UT
|
|
|
|
config CHIP_VDEC2
|
|
bool "VDEC2"
|
|
|
|
config CHIP_OPSP
|
|
bool "OPSP"
|
|
|
|
endchoice
|
|
|
|
config MMU
|
|
bool "Support for memory management hardware"
|
|
depends on CHIP_M32700 || CHIP_VDEC2 || CHIP_OPSP
|
|
default y
|
|
|
|
config TLB_ENTRIES
|
|
int "TLB Entries"
|
|
depends on CHIP_M32700 || CHIP_VDEC2 || CHIP_OPSP
|
|
default 32 if CHIP_M32700 || CHIP_OPSP
|
|
default 16 if CHIP_VDEC2
|
|
|
|
|
|
config ISA_M32R
|
|
bool
|
|
depends on CHIP_M32102 || CHIP_M32104
|
|
default y
|
|
|
|
config ISA_M32R2
|
|
bool
|
|
depends on CHIP_M32700 || CHIP_VDEC2 || CHIP_OPSP
|
|
default y
|
|
|
|
config ISA_DSP_LEVEL2
|
|
bool
|
|
depends on CHIP_M32700 || CHIP_OPSP
|
|
default y
|
|
|
|
config ISA_DUAL_ISSUE
|
|
bool
|
|
depends on CHIP_M32700 || CHIP_OPSP
|
|
default y
|
|
|
|
config PLAT_HAS_INT0ICU
|
|
bool
|
|
default n
|
|
|
|
config PLAT_HAS_INT1ICU
|
|
bool
|
|
default n
|
|
|
|
config PLAT_HAS_INT2ICU
|
|
bool
|
|
default n
|
|
|
|
config BUS_CLOCK
|
|
int "Bus Clock [Hz] (integer)"
|
|
default "70000000" if PLAT_MAPPI
|
|
default "25000000" if PLAT_USRV
|
|
default "50000000" if PLAT_MAPPI3
|
|
default "50000000" if PLAT_M32700UT
|
|
default "50000000" if PLAT_OPSPUT
|
|
default "54000000" if PLAT_M32104UT
|
|
default "33333333" if PLAT_OAKS32R
|
|
default "20000000" if PLAT_MAPPI2
|
|
|
|
config TIMER_DIVIDE
|
|
int "Timer divider (integer)"
|
|
default "128"
|
|
|
|
config CPU_LITTLE_ENDIAN
|
|
bool "Generate little endian code"
|
|
default n
|
|
|
|
config MEMORY_START
|
|
hex "Physical memory start address (hex)"
|
|
default "08000000" if PLAT_MAPPI || PLAT_MAPPI2 || PLAT_MAPPI3
|
|
default "08000000" if PLAT_USRV
|
|
default "08000000" if PLAT_M32700UT
|
|
default "08000000" if PLAT_OPSPUT
|
|
default "04000000" if PLAT_M32104UT
|
|
default "01000000" if PLAT_OAKS32R
|
|
|
|
config MEMORY_SIZE
|
|
hex "Physical memory size (hex)"
|
|
default "08000000" if PLAT_MAPPI3
|
|
default "04000000" if PLAT_MAPPI || PLAT_MAPPI2
|
|
default "02000000" if PLAT_USRV
|
|
default "01000000" if PLAT_M32700UT
|
|
default "01000000" if PLAT_OPSPUT
|
|
default "01000000" if PLAT_M32104UT
|
|
default "00800000" if PLAT_OAKS32R
|
|
|
|
config ARCH_DISCONTIGMEM_ENABLE
|
|
bool "Internal RAM Support"
|
|
depends on CHIP_M32700 || CHIP_M32102 || CHIP_VDEC2 || CHIP_OPSP || CHIP_M32104
|
|
default y
|
|
|
|
source "mm/Kconfig"
|
|
|
|
config IRAM_START
|
|
hex "Internal memory start address (hex)"
|
|
default "00f00000" if !CHIP_M32104
|
|
default "00700000" if CHIP_M32104
|
|
depends on (CHIP_M32700 || CHIP_M32102 || CHIP_VDEC2 || CHIP_OPSP || CHIP_M32104) && DISCONTIGMEM
|
|
|
|
config IRAM_SIZE
|
|
hex "Internal memory size (hex)"
|
|
depends on (CHIP_M32700 || CHIP_M32102 || CHIP_VDEC2 || CHIP_OPSP || CHIP_M32104) && DISCONTIGMEM
|
|
default "00080000" if CHIP_M32700
|
|
default "00010000" if CHIP_M32102 || CHIP_OPSP || CHIP_M32104
|
|
default "00008000" if CHIP_VDEC2
|
|
|
|
#
|
|
# Define implied options from the CPU selection here
|
|
#
|
|
|
|
config GENERIC_LOCKBREAK
|
|
bool
|
|
default y
|
|
depends on SMP && PREEMPT
|
|
|
|
config RWSEM_GENERIC_SPINLOCK
|
|
bool
|
|
depends on M32R
|
|
default y
|
|
|
|
config RWSEM_XCHGADD_ALGORITHM
|
|
bool
|
|
default n
|
|
|
|
config ARCH_HAS_ILOG2_U32
|
|
bool
|
|
default n
|
|
|
|
config ARCH_HAS_ILOG2_U64
|
|
bool
|
|
default n
|
|
|
|
config GENERIC_HWEIGHT
|
|
bool
|
|
default y
|
|
|
|
config GENERIC_CALIBRATE_DELAY
|
|
bool
|
|
default y
|
|
|
|
config SCHED_OMIT_FRAME_POINTER
|
|
bool
|
|
default y
|
|
|
|
source "kernel/Kconfig.preempt"
|
|
|
|
config SMP
|
|
bool "Symmetric multi-processing support"
|
|
depends on MMU
|
|
---help---
|
|
This enables support for systems with more than one CPU. If you have
|
|
a system with only one CPU, say N. If you have a system with more
|
|
than one CPU, say Y.
|
|
|
|
If you say N here, the kernel will run on uni- and multiprocessor
|
|
machines, but will use only one CPU of a multiprocessor machine. If
|
|
you say Y here, the kernel will run on many, but not all,
|
|
uniprocessor machines. On a uniprocessor machine, the kernel
|
|
will run faster if you say N here.
|
|
|
|
People using multiprocessor machines who say Y here should also say
|
|
Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
|
|
Management" code will be disabled if you say Y here.
|
|
|
|
See also the SMP-HOWTO available at
|
|
<http://tldp.org/HOWTO/SMP-HOWTO.html>.
|
|
|
|
If you don't know what to do here, say N.
|
|
|
|
config CHIP_M32700_TS1
|
|
bool "Workaround code for the M32700 TS1 chip's bug"
|
|
depends on (CHIP_M32700 && SMP)
|
|
default n
|
|
|
|
config NR_CPUS
|
|
int "Maximum number of CPUs (2-32)"
|
|
range 2 32
|
|
depends on SMP
|
|
default "2"
|
|
help
|
|
This allows you to specify the maximum number of CPUs which this
|
|
kernel will support. The maximum supported value is 32 and the
|
|
minimum value which makes sense is 2.
|
|
|
|
This is purely to save memory - each supported CPU adds
|
|
approximately eight kilobytes to the kernel image.
|
|
|
|
# Common NUMA Features
|
|
config NUMA
|
|
bool "Numa Memory Allocation Support"
|
|
depends on SMP && BROKEN
|
|
default n
|
|
|
|
config NODES_SHIFT
|
|
int
|
|
default "1"
|
|
depends on NEED_MULTIPLE_NODES
|
|
|
|
endmenu
|
|
|
|
|
|
menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
|
|
|
|
config PCI
|
|
bool "PCI support"
|
|
depends on BROKEN
|
|
default n
|
|
help
|
|
Find out whether you have a PCI motherboard. PCI is the name of a
|
|
bus system, i.e. the way the CPU talks to the other stuff inside
|
|
your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
|
|
VESA. If you have PCI, say Y, otherwise N.
|
|
|
|
choice
|
|
prompt "PCI access mode"
|
|
depends on PCI
|
|
default PCI_GOANY
|
|
|
|
config PCI_GOBIOS
|
|
bool "BIOS"
|
|
---help---
|
|
On PCI systems, the BIOS can be used to detect the PCI devices and
|
|
determine their configuration. However, some old PCI motherboards
|
|
have BIOS bugs and may crash if this is done. Also, some embedded
|
|
PCI-based systems don't have any BIOS at all. Linux can also try to
|
|
detect the PCI hardware directly without using the BIOS.
|
|
|
|
With this option, you can specify how Linux should detect the PCI
|
|
devices. If you choose "BIOS", the BIOS will be used, if you choose
|
|
"Direct", the BIOS won't be used, and if you choose "Any", the
|
|
kernel will try the direct access method and falls back to the BIOS
|
|
if that doesn't work. If unsure, go with the default, which is
|
|
"Any".
|
|
|
|
config PCI_GODIRECT
|
|
bool "Direct"
|
|
|
|
config PCI_GOANY
|
|
bool "Any"
|
|
|
|
endchoice
|
|
|
|
config PCI_BIOS
|
|
bool
|
|
depends on PCI && (PCI_GOBIOS || PCI_GOANY)
|
|
default y
|
|
|
|
config PCI_DIRECT
|
|
bool
|
|
depends on PCI && (PCI_GODIRECT || PCI_GOANY)
|
|
default y
|
|
|
|
source "drivers/pci/Kconfig"
|
|
|
|
config ISA
|
|
bool
|
|
|
|
source "drivers/pcmcia/Kconfig"
|
|
|
|
endmenu
|
|
|
|
|
|
menu "Executable file formats"
|
|
|
|
source "fs/Kconfig.binfmt"
|
|
|
|
endmenu
|
|
|
|
source "net/Kconfig"
|
|
|
|
source "drivers/Kconfig"
|
|
|
|
source "fs/Kconfig"
|
|
|
|
source "arch/m32r/Kconfig.debug"
|
|
|
|
source "security/Kconfig"
|
|
|
|
source "crypto/Kconfig"
|
|
|
|
source "lib/Kconfig"
|