mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	include/uapi/linux/byteorder, swab: force inlining of some byteswap operations
Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See
    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122
With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:
<get_unaligned_be16> (12 copies, 51 calls):
       66 8b 07                mov    (%rdi),%ax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       86 e0                   xchg   %ah,%al
       5d                      pop    %rbp
       c3                      retq
<get_unaligned_be32> (12 copies, 135 calls):
       8b 07                   mov    (%rdi),%eax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       0f c8                   bswap  %eax
       5d                      pop    %rbp
       c3                      retq
<get_unaligned_be64> (2 copies, 20 calls):
       48 8b 07                mov    (%rdi),%rax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       48 0f c8                bswap  %rax
       5d                      pop    %rbp
       c3                      retq
<__swab16p> (16 copies, 146 calls):
       55                      push   %rbp
       89 f8                   mov    %edi,%eax
       86 e0                   xchg   %ah,%al
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq
<__swab32p> (43 copies, ~560 calls):
       55                      push   %rbp
       89 f8                   mov    %edi,%eax
       0f c8                   bswap  %eax
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq
<__swab64p> (21 copies, 119 calls):
       55                      push   %rbp
       48 89 f8                mov    %rdi,%rax
       48 0f c8                bswap  %rax
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq
<__swab32s> (6 copies, 47 calls):
       8b 07                   mov    (%rdi),%eax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       0f c8                   bswap  %eax
       89 07                   mov    %eax,(%rdi)
       5d                      pop    %rbp
       c3                      retq
This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:
    text     data      bss       dec     hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									a644fdf029
								
							
						
					
					
						commit
						bc27fb68aa
					
				| @ -40,51 +40,51 @@ | ||||
| #define __cpu_to_be16(x) ((__force __be16)(__u16)(x)) | ||||
| #define __be16_to_cpu(x) ((__force __u16)(__be16)(x)) | ||||
| 
 | ||||
| static inline __le64 __cpu_to_le64p(const __u64 *p) | ||||
| static __always_inline __le64 __cpu_to_le64p(const __u64 *p) | ||||
| { | ||||
| 	return (__force __le64)__swab64p(p); | ||||
| } | ||||
| static inline __u64 __le64_to_cpup(const __le64 *p) | ||||
| static __always_inline __u64 __le64_to_cpup(const __le64 *p) | ||||
| { | ||||
| 	return __swab64p((__u64 *)p); | ||||
| } | ||||
| static inline __le32 __cpu_to_le32p(const __u32 *p) | ||||
| static __always_inline __le32 __cpu_to_le32p(const __u32 *p) | ||||
| { | ||||
| 	return (__force __le32)__swab32p(p); | ||||
| } | ||||
| static inline __u32 __le32_to_cpup(const __le32 *p) | ||||
| static __always_inline __u32 __le32_to_cpup(const __le32 *p) | ||||
| { | ||||
| 	return __swab32p((__u32 *)p); | ||||
| } | ||||
| static inline __le16 __cpu_to_le16p(const __u16 *p) | ||||
| static __always_inline __le16 __cpu_to_le16p(const __u16 *p) | ||||
| { | ||||
| 	return (__force __le16)__swab16p(p); | ||||
| } | ||||
| static inline __u16 __le16_to_cpup(const __le16 *p) | ||||
| static __always_inline __u16 __le16_to_cpup(const __le16 *p) | ||||
| { | ||||
| 	return __swab16p((__u16 *)p); | ||||
| } | ||||
| static inline __be64 __cpu_to_be64p(const __u64 *p) | ||||
| static __always_inline __be64 __cpu_to_be64p(const __u64 *p) | ||||
| { | ||||
| 	return (__force __be64)*p; | ||||
| } | ||||
| static inline __u64 __be64_to_cpup(const __be64 *p) | ||||
| static __always_inline __u64 __be64_to_cpup(const __be64 *p) | ||||
| { | ||||
| 	return (__force __u64)*p; | ||||
| } | ||||
| static inline __be32 __cpu_to_be32p(const __u32 *p) | ||||
| static __always_inline __be32 __cpu_to_be32p(const __u32 *p) | ||||
| { | ||||
| 	return (__force __be32)*p; | ||||
| } | ||||
| static inline __u32 __be32_to_cpup(const __be32 *p) | ||||
| static __always_inline __u32 __be32_to_cpup(const __be32 *p) | ||||
| { | ||||
| 	return (__force __u32)*p; | ||||
| } | ||||
| static inline __be16 __cpu_to_be16p(const __u16 *p) | ||||
| static __always_inline __be16 __cpu_to_be16p(const __u16 *p) | ||||
| { | ||||
| 	return (__force __be16)*p; | ||||
| } | ||||
| static inline __u16 __be16_to_cpup(const __be16 *p) | ||||
| static __always_inline __u16 __be16_to_cpup(const __be16 *p) | ||||
| { | ||||
| 	return (__force __u16)*p; | ||||
| } | ||||
|  | ||||
| @ -40,51 +40,51 @@ | ||||
| #define __cpu_to_be16(x) ((__force __be16)__swab16((x))) | ||||
| #define __be16_to_cpu(x) __swab16((__force __u16)(__be16)(x)) | ||||
| 
 | ||||
| static inline __le64 __cpu_to_le64p(const __u64 *p) | ||||
| static __always_inline __le64 __cpu_to_le64p(const __u64 *p) | ||||
| { | ||||
| 	return (__force __le64)*p; | ||||
| } | ||||
| static inline __u64 __le64_to_cpup(const __le64 *p) | ||||
| static __always_inline __u64 __le64_to_cpup(const __le64 *p) | ||||
| { | ||||
| 	return (__force __u64)*p; | ||||
| } | ||||
| static inline __le32 __cpu_to_le32p(const __u32 *p) | ||||
| static __always_inline __le32 __cpu_to_le32p(const __u32 *p) | ||||
| { | ||||
| 	return (__force __le32)*p; | ||||
| } | ||||
| static inline __u32 __le32_to_cpup(const __le32 *p) | ||||
| static __always_inline __u32 __le32_to_cpup(const __le32 *p) | ||||
| { | ||||
| 	return (__force __u32)*p; | ||||
| } | ||||
| static inline __le16 __cpu_to_le16p(const __u16 *p) | ||||
| static __always_inline __le16 __cpu_to_le16p(const __u16 *p) | ||||
| { | ||||
| 	return (__force __le16)*p; | ||||
| } | ||||
| static inline __u16 __le16_to_cpup(const __le16 *p) | ||||
| static __always_inline __u16 __le16_to_cpup(const __le16 *p) | ||||
| { | ||||
| 	return (__force __u16)*p; | ||||
| } | ||||
| static inline __be64 __cpu_to_be64p(const __u64 *p) | ||||
| static __always_inline __be64 __cpu_to_be64p(const __u64 *p) | ||||
| { | ||||
| 	return (__force __be64)__swab64p(p); | ||||
| } | ||||
| static inline __u64 __be64_to_cpup(const __be64 *p) | ||||
| static __always_inline __u64 __be64_to_cpup(const __be64 *p) | ||||
| { | ||||
| 	return __swab64p((__u64 *)p); | ||||
| } | ||||
| static inline __be32 __cpu_to_be32p(const __u32 *p) | ||||
| static __always_inline __be32 __cpu_to_be32p(const __u32 *p) | ||||
| { | ||||
| 	return (__force __be32)__swab32p(p); | ||||
| } | ||||
| static inline __u32 __be32_to_cpup(const __be32 *p) | ||||
| static __always_inline __u32 __be32_to_cpup(const __be32 *p) | ||||
| { | ||||
| 	return __swab32p((__u32 *)p); | ||||
| } | ||||
| static inline __be16 __cpu_to_be16p(const __u16 *p) | ||||
| static __always_inline __be16 __cpu_to_be16p(const __u16 *p) | ||||
| { | ||||
| 	return (__force __be16)__swab16p(p); | ||||
| } | ||||
| static inline __u16 __be16_to_cpup(const __be16 *p) | ||||
| static __always_inline __u16 __be16_to_cpup(const __be16 *p) | ||||
| { | ||||
| 	return __swab16p((__u16 *)p); | ||||
| } | ||||
|  | ||||
| @ -151,7 +151,7 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) | ||||
|  * __swab16p - return a byteswapped 16-bit value from a pointer | ||||
|  * @p: pointer to a naturally-aligned 16-bit value | ||||
|  */ | ||||
| static inline __u16 __swab16p(const __u16 *p) | ||||
| static __always_inline __u16 __swab16p(const __u16 *p) | ||||
| { | ||||
| #ifdef __arch_swab16p | ||||
| 	return __arch_swab16p(p); | ||||
| @ -164,7 +164,7 @@ static inline __u16 __swab16p(const __u16 *p) | ||||
|  * __swab32p - return a byteswapped 32-bit value from a pointer | ||||
|  * @p: pointer to a naturally-aligned 32-bit value | ||||
|  */ | ||||
| static inline __u32 __swab32p(const __u32 *p) | ||||
| static __always_inline __u32 __swab32p(const __u32 *p) | ||||
| { | ||||
| #ifdef __arch_swab32p | ||||
| 	return __arch_swab32p(p); | ||||
| @ -177,7 +177,7 @@ static inline __u32 __swab32p(const __u32 *p) | ||||
|  * __swab64p - return a byteswapped 64-bit value from a pointer | ||||
|  * @p: pointer to a naturally-aligned 64-bit value | ||||
|  */ | ||||
| static inline __u64 __swab64p(const __u64 *p) | ||||
| static __always_inline __u64 __swab64p(const __u64 *p) | ||||
| { | ||||
| #ifdef __arch_swab64p | ||||
| 	return __arch_swab64p(p); | ||||
| @ -232,7 +232,7 @@ static inline void __swab16s(__u16 *p) | ||||
|  * __swab32s - byteswap a 32-bit value in-place | ||||
|  * @p: pointer to a naturally-aligned 32-bit value | ||||
|  */ | ||||
| static inline void __swab32s(__u32 *p) | ||||
| static __always_inline void __swab32s(__u32 *p) | ||||
| { | ||||
| #ifdef __arch_swab32s | ||||
| 	__arch_swab32s(p); | ||||
| @ -245,7 +245,7 @@ static inline void __swab32s(__u32 *p) | ||||
|  * __swab64s - byteswap a 64-bit value in-place | ||||
|  * @p: pointer to a naturally-aligned 64-bit value | ||||
|  */ | ||||
| static inline void __swab64s(__u64 *p) | ||||
| static __always_inline void __swab64s(__u64 *p) | ||||
| { | ||||
| #ifdef __arch_swab64s | ||||
| 	__arch_swab64s(p); | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Denys Vlasenko
						Denys Vlasenko