mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	perf/x86: Add option to disable reading branch flags/cycles
With LBRv5 reading the extra LBR flags like mispredict, TSX, cycles is not free anymore, as it has moved to a separate MSR. For callstack mode we don't need any of this information; so we can avoid the unnecessary MSR read. Add flags to the perf interface where perf record can request not collecting this information. Add branch_sample_type flags for CYCLES and FLAGS. It's a bit unusual for branch_sample_types to be negative (disable), not positive (enable), but since the legacy ABI reported the flags we need some form of explicit disabling to avoid breaking the ABI. After we have the flags the x86 perf code can keep track if any users need the flags. If noone needs it the information is not collected. This cuts down the cost of LBR callstack on Skylake significantly. Profiling a kernel build with LBR call stack the average run time of the PMI handler drops by 43%. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: acme@kernel.org Cc: jolsa@kernel.org Link: http://lkml.kernel.org/r/1445366797-30894-2-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
							parent
							
								
									75925e1ad7
								
							
						
					
					
						commit
						b16a5b52eb
					
				| @ -42,6 +42,13 @@ static enum { | |||||||
| #define LBR_FAR_BIT		8 /* do not capture far branches */ | #define LBR_FAR_BIT		8 /* do not capture far branches */ | ||||||
| #define LBR_CALL_STACK_BIT	9 /* enable call stack */ | #define LBR_CALL_STACK_BIT	9 /* enable call stack */ | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Following bit only exists in Linux; we mask it out before writing it to | ||||||
|  |  * the actual MSR. But it helps the constraint perf code to understand | ||||||
|  |  * that this is a separate configuration. | ||||||
|  |  */ | ||||||
|  | #define LBR_NO_INFO_BIT	       63 /* don't read LBR_INFO. */ | ||||||
|  | 
 | ||||||
| #define LBR_KERNEL	(1 << LBR_KERNEL_BIT) | #define LBR_KERNEL	(1 << LBR_KERNEL_BIT) | ||||||
| #define LBR_USER	(1 << LBR_USER_BIT) | #define LBR_USER	(1 << LBR_USER_BIT) | ||||||
| #define LBR_JCC		(1 << LBR_JCC_BIT) | #define LBR_JCC		(1 << LBR_JCC_BIT) | ||||||
| @ -52,6 +59,7 @@ static enum { | |||||||
| #define LBR_IND_JMP	(1 << LBR_IND_JMP_BIT) | #define LBR_IND_JMP	(1 << LBR_IND_JMP_BIT) | ||||||
| #define LBR_FAR		(1 << LBR_FAR_BIT) | #define LBR_FAR		(1 << LBR_FAR_BIT) | ||||||
| #define LBR_CALL_STACK	(1 << LBR_CALL_STACK_BIT) | #define LBR_CALL_STACK	(1 << LBR_CALL_STACK_BIT) | ||||||
|  | #define LBR_NO_INFO	(1ULL << LBR_NO_INFO_BIT) | ||||||
| 
 | 
 | ||||||
| #define LBR_PLM (LBR_KERNEL | LBR_USER) | #define LBR_PLM (LBR_KERNEL | LBR_USER) | ||||||
| 
 | 
 | ||||||
| @ -152,7 +160,7 @@ static void __intel_pmu_lbr_enable(bool pmi) | |||||||
| 	 * did not change. | 	 * did not change. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (cpuc->lbr_sel) | 	if (cpuc->lbr_sel) | ||||||
| 		lbr_select = cpuc->lbr_sel->config; | 		lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; | ||||||
| 	if (!pmi) | 	if (!pmi) | ||||||
| 		wrmsrl(MSR_LBR_SELECT, lbr_select); | 		wrmsrl(MSR_LBR_SELECT, lbr_select); | ||||||
| 
 | 
 | ||||||
| @ -422,6 +430,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | |||||||
|  */ |  */ | ||||||
| static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | ||||||
| { | { | ||||||
|  | 	bool need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); | ||||||
| 	unsigned long mask = x86_pmu.lbr_nr - 1; | 	unsigned long mask = x86_pmu.lbr_nr - 1; | ||||||
| 	int lbr_format = x86_pmu.intel_cap.lbr_format; | 	int lbr_format = x86_pmu.intel_cap.lbr_format; | ||||||
| 	u64 tos = intel_pmu_lbr_tos(); | 	u64 tos = intel_pmu_lbr_tos(); | ||||||
| @ -442,7 +451,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | |||||||
| 		rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | 		rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | ||||||
| 		rdmsrl(x86_pmu.lbr_to   + lbr_idx, to); | 		rdmsrl(x86_pmu.lbr_to   + lbr_idx, to); | ||||||
| 
 | 
 | ||||||
| 		if (lbr_format == LBR_FORMAT_INFO) { | 		if (lbr_format == LBR_FORMAT_INFO && need_info) { | ||||||
| 			u64 info; | 			u64 info; | ||||||
| 
 | 
 | ||||||
| 			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info); | 			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info); | ||||||
| @ -590,6 +599,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) | |||||||
| 		if (v != LBR_IGN) | 		if (v != LBR_IGN) | ||||||
| 			mask |= v; | 			mask |= v; | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
| 	reg = &event->hw.branch_reg; | 	reg = &event->hw.branch_reg; | ||||||
| 	reg->idx = EXTRA_REG_LBR; | 	reg->idx = EXTRA_REG_LBR; | ||||||
| 
 | 
 | ||||||
| @ -600,6 +610,11 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) | |||||||
| 	 */ | 	 */ | ||||||
| 	reg->config = mask ^ x86_pmu.lbr_sel_mask; | 	reg->config = mask ^ x86_pmu.lbr_sel_mask; | ||||||
| 
 | 
 | ||||||
|  | 	if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && | ||||||
|  | 	    (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && | ||||||
|  | 	    (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)) | ||||||
|  | 		reg->config |= LBR_NO_INFO; | ||||||
|  | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -171,6 +171,9 @@ enum perf_branch_sample_type_shift { | |||||||
| 	PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT	= 12, /* indirect jumps */ | 	PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT	= 12, /* indirect jumps */ | ||||||
| 	PERF_SAMPLE_BRANCH_CALL_SHIFT		= 13, /* direct call */ | 	PERF_SAMPLE_BRANCH_CALL_SHIFT		= 13, /* direct call */ | ||||||
| 
 | 
 | ||||||
|  | 	PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT	= 14, /* no flags */ | ||||||
|  | 	PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT	= 15, /* no cycles */ | ||||||
|  | 
 | ||||||
| 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */ | 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| @ -192,6 +195,9 @@ enum perf_branch_sample_type { | |||||||
| 	PERF_SAMPLE_BRANCH_IND_JUMP	= 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, | 	PERF_SAMPLE_BRANCH_IND_JUMP	= 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, | ||||||
| 	PERF_SAMPLE_BRANCH_CALL		= 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, | 	PERF_SAMPLE_BRANCH_CALL		= 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, | ||||||
| 
 | 
 | ||||||
|  | 	PERF_SAMPLE_BRANCH_NO_FLAGS	= 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, | ||||||
|  | 	PERF_SAMPLE_BRANCH_NO_CYCLES	= 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, | ||||||
|  | 
 | ||||||
| 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, | 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Andi Kleen
						Andi Kleen