mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	perf/x86: Reduce stack usage of x86_schedule_events()
x86_schedule_events() caches event constraints on the stack during scheduling. Given the number of possible events, this is 512 bytes of stack; since it can be invoked under schedule() under god-knows-what, this is causing stack blowouts. Trade some space usage for stack safety: add a place to cache the constraint pointer to struct perf_event. For 8 bytes per event (1% of its size) we can save the giant stack frame. This shouldn't change any aspect of scheduling whatsoever and while in theory the locality's a tiny bit worse, I doubt we'll see any performance impact either. Tested: `perf stat whatever` does not blow up and produces results that aren't hugely obviously wrong. I'm not sure how to run particularly good tests of perf code, but this should not produce any functional change whatsoever. Signed-off-by: Andrew Hunter <ahh@google.com> Reviewed-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1369332423-4400-1-git-send-email-ahh@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
							parent
							
								
									03d8e80beb
								
							
						
					
					
						commit
						43b4578071
					
				| @ -568,7 +568,7 @@ struct sched_state { | |||||||
| struct perf_sched { | struct perf_sched { | ||||||
| 	int			max_weight; | 	int			max_weight; | ||||||
| 	int			max_events; | 	int			max_events; | ||||||
| 	struct event_constraint	**constraints; | 	struct perf_event	**events; | ||||||
| 	struct sched_state	state; | 	struct sched_state	state; | ||||||
| 	int			saved_states; | 	int			saved_states; | ||||||
| 	struct sched_state	saved[SCHED_STATES_MAX]; | 	struct sched_state	saved[SCHED_STATES_MAX]; | ||||||
| @ -577,7 +577,7 @@ struct perf_sched { | |||||||
| /*
 | /*
 | ||||||
|  * Initialize interator that runs through all events and counters. |  * Initialize interator that runs through all events and counters. | ||||||
|  */ |  */ | ||||||
| static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, | static void perf_sched_init(struct perf_sched *sched, struct perf_event **events, | ||||||
| 			    int num, int wmin, int wmax) | 			    int num, int wmin, int wmax) | ||||||
| { | { | ||||||
| 	int idx; | 	int idx; | ||||||
| @ -585,10 +585,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint ** | |||||||
| 	memset(sched, 0, sizeof(*sched)); | 	memset(sched, 0, sizeof(*sched)); | ||||||
| 	sched->max_events	= num; | 	sched->max_events	= num; | ||||||
| 	sched->max_weight	= wmax; | 	sched->max_weight	= wmax; | ||||||
| 	sched->constraints	= c; | 	sched->events		= events; | ||||||
| 
 | 
 | ||||||
| 	for (idx = 0; idx < num; idx++) { | 	for (idx = 0; idx < num; idx++) { | ||||||
| 		if (c[idx]->weight == wmin) | 		if (events[idx]->hw.constraint->weight == wmin) | ||||||
| 			break; | 			break; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| @ -635,8 +635,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) | |||||||
| 	if (sched->state.event >= sched->max_events) | 	if (sched->state.event >= sched->max_events) | ||||||
| 		return false; | 		return false; | ||||||
| 
 | 
 | ||||||
| 	c = sched->constraints[sched->state.event]; | 	c = sched->events[sched->state.event]->hw.constraint; | ||||||
| 
 |  | ||||||
| 	/* Prefer fixed purpose counters */ | 	/* Prefer fixed purpose counters */ | ||||||
| 	if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { | 	if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { | ||||||
| 		idx = INTEL_PMC_IDX_FIXED; | 		idx = INTEL_PMC_IDX_FIXED; | ||||||
| @ -694,7 +693,7 @@ static bool perf_sched_next_event(struct perf_sched *sched) | |||||||
| 			if (sched->state.weight > sched->max_weight) | 			if (sched->state.weight > sched->max_weight) | ||||||
| 				return false; | 				return false; | ||||||
| 		} | 		} | ||||||
| 		c = sched->constraints[sched->state.event]; | 		c = sched->events[sched->state.event]->hw.constraint; | ||||||
| 	} while (c->weight != sched->state.weight); | 	} while (c->weight != sched->state.weight); | ||||||
| 
 | 
 | ||||||
| 	sched->state.counter = 0;	/* start with first counter */ | 	sched->state.counter = 0;	/* start with first counter */ | ||||||
| @ -705,12 +704,12 @@ static bool perf_sched_next_event(struct perf_sched *sched) | |||||||
| /*
 | /*
 | ||||||
|  * Assign a counter for each event. |  * Assign a counter for each event. | ||||||
|  */ |  */ | ||||||
| int perf_assign_events(struct event_constraint **constraints, int n, | int perf_assign_events(struct perf_event **events, int n, | ||||||
| 			int wmin, int wmax, int *assign) | 			int wmin, int wmax, int *assign) | ||||||
| { | { | ||||||
| 	struct perf_sched sched; | 	struct perf_sched sched; | ||||||
| 
 | 
 | ||||||
| 	perf_sched_init(&sched, constraints, n, wmin, wmax); | 	perf_sched_init(&sched, events, n, wmin, wmax); | ||||||
| 
 | 
 | ||||||
| 	do { | 	do { | ||||||
| 		if (!perf_sched_find_counter(&sched)) | 		if (!perf_sched_find_counter(&sched)) | ||||||
| @ -724,7 +723,7 @@ int perf_assign_events(struct event_constraint **constraints, int n, | |||||||
| 
 | 
 | ||||||
| int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | ||||||
| { | { | ||||||
| 	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; | 	struct event_constraint *c; | ||||||
| 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||||||
| 	int i, wmin, wmax, num = 0; | 	int i, wmin, wmax, num = 0; | ||||||
| 	struct hw_perf_event *hwc; | 	struct hw_perf_event *hwc; | ||||||
| @ -732,8 +731,10 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||||||
| 	bitmap_zero(used_mask, X86_PMC_IDX_MAX); | 	bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { | 	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { | ||||||
|  | 		hwc = &cpuc->event_list[i]->hw; | ||||||
| 		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); | 		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); | ||||||
| 		constraints[i] = c; | 		hwc->constraint = c; | ||||||
|  | 
 | ||||||
| 		wmin = min(wmin, c->weight); | 		wmin = min(wmin, c->weight); | ||||||
| 		wmax = max(wmax, c->weight); | 		wmax = max(wmax, c->weight); | ||||||
| 	} | 	} | ||||||
| @ -743,7 +744,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||||||
| 	 */ | 	 */ | ||||||
| 	for (i = 0; i < n; i++) { | 	for (i = 0; i < n; i++) { | ||||||
| 		hwc = &cpuc->event_list[i]->hw; | 		hwc = &cpuc->event_list[i]->hw; | ||||||
| 		c = constraints[i]; | 		c = hwc->constraint; | ||||||
| 
 | 
 | ||||||
| 		/* never assigned */ | 		/* never assigned */ | ||||||
| 		if (hwc->idx == -1) | 		if (hwc->idx == -1) | ||||||
| @ -764,7 +765,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||||||
| 
 | 
 | ||||||
| 	/* slow path */ | 	/* slow path */ | ||||||
| 	if (i != n) | 	if (i != n) | ||||||
| 		num = perf_assign_events(constraints, n, wmin, wmax, assign); | 		num = perf_assign_events(cpuc->event_list, n, wmin, | ||||||
|  | 					 wmax, assign); | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * scheduling failed or is just a simulation, | 	 * scheduling failed or is just a simulation, | ||||||
|  | |||||||
| @ -528,7 +528,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | |||||||
| 
 | 
 | ||||||
| void x86_pmu_enable_all(int added); | void x86_pmu_enable_all(int added); | ||||||
| 
 | 
 | ||||||
| int perf_assign_events(struct event_constraint **constraints, int n, | int perf_assign_events(struct perf_event **events, int n, | ||||||
| 			int wmin, int wmax, int *assign); | 			int wmin, int wmax, int *assign); | ||||||
| int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -2723,15 +2723,16 @@ static void uncore_put_event_constraint(struct intel_uncore_box *box, struct per | |||||||
| static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) | static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) | ||||||
| { | { | ||||||
| 	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; | 	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; | ||||||
| 	struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; | 	struct event_constraint *c; | ||||||
| 	int i, wmin, wmax, ret = 0; | 	int i, wmin, wmax, ret = 0; | ||||||
| 	struct hw_perf_event *hwc; | 	struct hw_perf_event *hwc; | ||||||
| 
 | 
 | ||||||
| 	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); | 	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { | 	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { | ||||||
|  | 		hwc = &box->event_list[i]->hw; | ||||||
| 		c = uncore_get_event_constraint(box, box->event_list[i]); | 		c = uncore_get_event_constraint(box, box->event_list[i]); | ||||||
| 		constraints[i] = c; | 		hwc->constraint = c; | ||||||
| 		wmin = min(wmin, c->weight); | 		wmin = min(wmin, c->weight); | ||||||
| 		wmax = max(wmax, c->weight); | 		wmax = max(wmax, c->weight); | ||||||
| 	} | 	} | ||||||
| @ -2739,7 +2740,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int | |||||||
| 	/* fastpath, try to reuse previous register */ | 	/* fastpath, try to reuse previous register */ | ||||||
| 	for (i = 0; i < n; i++) { | 	for (i = 0; i < n; i++) { | ||||||
| 		hwc = &box->event_list[i]->hw; | 		hwc = &box->event_list[i]->hw; | ||||||
| 		c = constraints[i]; | 		c = hwc->constraint; | ||||||
| 
 | 
 | ||||||
| 		/* never assigned */ | 		/* never assigned */ | ||||||
| 		if (hwc->idx == -1) | 		if (hwc->idx == -1) | ||||||
| @ -2759,7 +2760,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int | |||||||
| 	} | 	} | ||||||
| 	/* slow path */ | 	/* slow path */ | ||||||
| 	if (i != n) | 	if (i != n) | ||||||
| 		ret = perf_assign_events(constraints, n, wmin, wmax, assign); | 		ret = perf_assign_events(box->event_list, n, | ||||||
|  | 					 wmin, wmax, assign); | ||||||
| 
 | 
 | ||||||
| 	if (!assign || ret) { | 	if (!assign || ret) { | ||||||
| 		for (i = 0; i < n; i++) | 		for (i = 0; i < n; i++) | ||||||
|  | |||||||
| @ -113,6 +113,8 @@ struct hw_perf_event_extra { | |||||||
| 	int		idx;	/* index in shared_regs->regs[] */ | 	int		idx;	/* index in shared_regs->regs[] */ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | struct event_constraint; | ||||||
|  | 
 | ||||||
| /**
 | /**
 | ||||||
|  * struct hw_perf_event - performance event hardware details: |  * struct hw_perf_event - performance event hardware details: | ||||||
|  */ |  */ | ||||||
| @ -131,6 +133,8 @@ struct hw_perf_event { | |||||||
| 
 | 
 | ||||||
| 			struct hw_perf_event_extra extra_reg; | 			struct hw_perf_event_extra extra_reg; | ||||||
| 			struct hw_perf_event_extra branch_reg; | 			struct hw_perf_event_extra branch_reg; | ||||||
|  | 
 | ||||||
|  | 			struct event_constraint *constraint; | ||||||
| 		}; | 		}; | ||||||
| 		struct { /* software */ | 		struct { /* software */ | ||||||
| 			struct hrtimer	hrtimer; | 			struct hrtimer	hrtimer; | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Andrew Hunter
						Andrew Hunter