mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	 1ddf0b1b11
			
		
	
	
		1ddf0b1b11
		
	
	
	
	
		
			
			In Linux 3.18 and below, GCC hoists the lsl instructions in the pvclock code all the way to the beginning of __vdso_clock_gettime, slowing the non-paravirt case significantly. For unknown reasons, presumably related to the removal of a branch, the performance issue is gone as ofe76b027e64x86,vdso: Use LSL unconditionally for vgetcpu but I don't trust GCC enough to expect the problem to stay fixed. There should be no correctness issue, because the __getcpu calls in __vdso_vlock_gettime were never necessary in the first place. Note to stable maintainers: In 3.18 and below, depending on configuration, gcc 4.9.2 generates code like this: 9c3: 44 0f 03 e8 lsl %ax,%r13d 9c7: 45 89 eb mov %r13d,%r11d 9ca: 0f 03 d8 lsl %ax,%ebx This patch won't apply as is to any released kernel, but I'll send a trivial backported version if needed. Fixes:51c19b4f59x86: vdso: pvclock gettime support Cc: stable@vger.kernel.org # 3.8+ Cc: Marcelo Tosatti <mtosatti@redhat.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
		
			
				
	
	
		
			95 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			95 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| #ifndef _ASM_X86_VGTOD_H
 | |
| #define _ASM_X86_VGTOD_H
 | |
| 
 | |
| #include <linux/compiler.h>
 | |
| #include <linux/clocksource.h>
 | |
| 
 | |
| #ifdef BUILD_VDSO32_64
 | |
| typedef u64 gtod_long_t;
 | |
| #else
 | |
| typedef unsigned long gtod_long_t;
 | |
| #endif
 | |
| /*
 | |
|  * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
 | |
|  * so be carefull by modifying this structure.
 | |
|  */
 | |
| struct vsyscall_gtod_data {
 | |
| 	unsigned seq;
 | |
| 
 | |
| 	int vclock_mode;
 | |
| 	cycle_t	cycle_last;
 | |
| 	cycle_t	mask;
 | |
| 	u32	mult;
 | |
| 	u32	shift;
 | |
| 
 | |
| 	/* open coded 'struct timespec' */
 | |
| 	u64		wall_time_snsec;
 | |
| 	gtod_long_t	wall_time_sec;
 | |
| 	gtod_long_t	monotonic_time_sec;
 | |
| 	u64		monotonic_time_snsec;
 | |
| 	gtod_long_t	wall_time_coarse_sec;
 | |
| 	gtod_long_t	wall_time_coarse_nsec;
 | |
| 	gtod_long_t	monotonic_time_coarse_sec;
 | |
| 	gtod_long_t	monotonic_time_coarse_nsec;
 | |
| 
 | |
| 	int		tz_minuteswest;
 | |
| 	int		tz_dsttime;
 | |
| };
 | |
| extern struct vsyscall_gtod_data vsyscall_gtod_data;
 | |
| 
 | |
| static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
 | |
| {
 | |
| 	unsigned ret;
 | |
| 
 | |
| repeat:
 | |
| 	ret = ACCESS_ONCE(s->seq);
 | |
| 	if (unlikely(ret & 1)) {
 | |
| 		cpu_relax();
 | |
| 		goto repeat;
 | |
| 	}
 | |
| 	smp_rmb();
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
 | |
| 					unsigned start)
 | |
| {
 | |
| 	smp_rmb();
 | |
| 	return unlikely(s->seq != start);
 | |
| }
 | |
| 
 | |
| static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
 | |
| {
 | |
| 	++s->seq;
 | |
| 	smp_wmb();
 | |
| }
 | |
| 
 | |
| static inline void gtod_write_end(struct vsyscall_gtod_data *s)
 | |
| {
 | |
| 	smp_wmb();
 | |
| 	++s->seq;
 | |
| }
 | |
| 
 | |
| #ifdef CONFIG_X86_64
 | |
| 
 | |
| #define VGETCPU_CPU_MASK 0xfff
 | |
| 
 | |
| static inline unsigned int __getcpu(void)
 | |
| {
 | |
| 	unsigned int p;
 | |
| 
 | |
| 	/*
 | |
| 	 * Load per CPU data from GDT.  LSL is faster than RDTSCP and
 | |
| 	 * works on all CPUs.  This is volatile so that it orders
 | |
| 	 * correctly wrt barrier() and to keep gcc from cleverly
 | |
| 	 * hoisting it out of the calling function.
 | |
| 	 */
 | |
| 	asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
 | |
| 
 | |
| 	return p;
 | |
| }
 | |
| 
 | |
| #endif /* CONFIG_X86_64 */
 | |
| 
 | |
| #endif /* _ASM_X86_VGTOD_H */
 |