mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	 f5967101e9
			
		
	
	
		f5967101e9
		
	
	
	
	
		
			
			People complained about ARCH_HWEIGHT_CFLAGS and how it throws a wrench
into kcov, lto, etc, experimentations.
Add asm versions for __sw_hweight{32,64}() and do explicit saving and
restoring of clobbered registers. This gets rid of the special calling
convention. We get to call those functions on !X86_FEATURE_POPCNT CPUs.
We still need to hardcode POPCNT and register operands as some old gas
versions which we support, do not know about POPCNT.
Btw, remove redundant REX prefix from 32-bit POPCNT because alternatives
can do padding now.
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1464605787-20603-1-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
		
	
			
		
			
				
	
	
		
			72 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			72 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| #include <linux/export.h>
 | |
| #include <linux/bitops.h>
 | |
| #include <asm/types.h>
 | |
| 
 | |
| /**
 | |
|  * hweightN - returns the hamming weight of a N-bit word
 | |
|  * @x: the word to weigh
 | |
|  *
 | |
|  * The Hamming Weight of a number is the total number of bits set in it.
 | |
|  */
 | |
| 
 | |
| #ifndef __HAVE_ARCH_SW_HWEIGHT
 | |
| unsigned int __sw_hweight32(unsigned int w)
 | |
| {
 | |
| #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
 | |
| 	w -= (w >> 1) & 0x55555555;
 | |
| 	w =  (w & 0x33333333) + ((w >> 2) & 0x33333333);
 | |
| 	w =  (w + (w >> 4)) & 0x0f0f0f0f;
 | |
| 	return (w * 0x01010101) >> 24;
 | |
| #else
 | |
| 	unsigned int res = w - ((w >> 1) & 0x55555555);
 | |
| 	res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
 | |
| 	res = (res + (res >> 4)) & 0x0F0F0F0F;
 | |
| 	res = res + (res >> 8);
 | |
| 	return (res + (res >> 16)) & 0x000000FF;
 | |
| #endif
 | |
| }
 | |
| EXPORT_SYMBOL(__sw_hweight32);
 | |
| #endif
 | |
| 
 | |
| unsigned int __sw_hweight16(unsigned int w)
 | |
| {
 | |
| 	unsigned int res = w - ((w >> 1) & 0x5555);
 | |
| 	res = (res & 0x3333) + ((res >> 2) & 0x3333);
 | |
| 	res = (res + (res >> 4)) & 0x0F0F;
 | |
| 	return (res + (res >> 8)) & 0x00FF;
 | |
| }
 | |
| EXPORT_SYMBOL(__sw_hweight16);
 | |
| 
 | |
| unsigned int __sw_hweight8(unsigned int w)
 | |
| {
 | |
| 	unsigned int res = w - ((w >> 1) & 0x55);
 | |
| 	res = (res & 0x33) + ((res >> 2) & 0x33);
 | |
| 	return (res + (res >> 4)) & 0x0F;
 | |
| }
 | |
| EXPORT_SYMBOL(__sw_hweight8);
 | |
| 
 | |
| #ifndef __HAVE_ARCH_SW_HWEIGHT
 | |
| unsigned long __sw_hweight64(__u64 w)
 | |
| {
 | |
| #if BITS_PER_LONG == 32
 | |
| 	return __sw_hweight32((unsigned int)(w >> 32)) +
 | |
| 	       __sw_hweight32((unsigned int)w);
 | |
| #elif BITS_PER_LONG == 64
 | |
| #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
 | |
| 	w -= (w >> 1) & 0x5555555555555555ul;
 | |
| 	w =  (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul);
 | |
| 	w =  (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful;
 | |
| 	return (w * 0x0101010101010101ul) >> 56;
 | |
| #else
 | |
| 	__u64 res = w - ((w >> 1) & 0x5555555555555555ul);
 | |
| 	res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
 | |
| 	res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful;
 | |
| 	res = res + (res >> 8);
 | |
| 	res = res + (res >> 16);
 | |
| 	return (res + (res >> 32)) & 0x00000000000000FFul;
 | |
| #endif
 | |
| #endif
 | |
| }
 | |
| EXPORT_SYMBOL(__sw_hweight64);
 | |
| #endif
 |