2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

drm/i915: Implement pwrite without struct-mutex

We only need struct_mutex within pwrite for a brief window where we need
to serialise with rendering and control our cache domains. Elsewhere we
can rely on the backing storage being pinned, and forgive userspace any
races against us.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-17-chris@chris-wilson.co.uk
This commit is contained in:
Chris Wilson 2016-10-28 13:58:40 +01:00
parent bb6dc8d96b
commit fe115628d5

View File

@ -1116,72 +1116,50 @@ out:
* page faults in the source data * page faults in the source data
*/ */
static inline int static inline bool
fast_user_write(struct io_mapping *mapping, ggtt_write(struct io_mapping *mapping,
loff_t page_base, int page_offset, loff_t base, int offset,
char __user *user_data, char __user *user_data, int length)
int length)
{ {
void __iomem *vaddr_atomic;
void *vaddr; void *vaddr;
unsigned long unwritten; unsigned long unwritten;
vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
/* We can use the cpu mem copy function because this is X86. */ /* We can use the cpu mem copy function because this is X86. */
vaddr = (void __force*)vaddr_atomic + page_offset; vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
unwritten = __copy_from_user_inatomic_nocache(vaddr, unwritten = __copy_from_user_inatomic_nocache(vaddr + offset,
user_data, length); user_data, length);
io_mapping_unmap_atomic(vaddr_atomic); io_mapping_unmap_atomic(vaddr);
return unwritten; if (unwritten) {
} vaddr = (void __force *)
io_mapping_map_wc(mapping, base, PAGE_SIZE);
unwritten = copy_from_user(vaddr + offset, user_data, length);
io_mapping_unmap(vaddr);
}
static inline unsigned long
slow_user_access(struct io_mapping *mapping,
unsigned long page_base, int page_offset,
char __user *user_data,
unsigned long length, bool pwrite)
{
void __iomem *ioaddr;
void *vaddr;
unsigned long unwritten;
ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
/* We can use the cpu mem copy function because this is X86. */
vaddr = (void __force *)ioaddr + page_offset;
if (pwrite)
unwritten = __copy_from_user(vaddr, user_data, length);
else
unwritten = __copy_to_user(user_data, vaddr, length);
io_mapping_unmap(ioaddr);
return unwritten; return unwritten;
} }
/** /**
* This is the fast pwrite path, where we copy the data directly from the * This is the fast pwrite path, where we copy the data directly from the
* user into the GTT, uncached. * user into the GTT, uncached.
* @i915: i915 device private data * @obj: i915 GEM object
* @obj: i915 gem object
* @args: pwrite arguments structure * @args: pwrite arguments structure
* @file: drm file pointer
*/ */
static int static int
i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *args)
struct drm_i915_gem_pwrite *args,
struct drm_file *file)
{ {
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_ggtt *ggtt = &i915->ggtt; struct i915_ggtt *ggtt = &i915->ggtt;
struct drm_device *dev = obj->base.dev;
struct i915_vma *vma;
struct drm_mm_node node; struct drm_mm_node node;
uint64_t remain, offset; struct i915_vma *vma;
char __user *user_data; u64 remain, offset;
void __user *user_data;
int ret; int ret;
bool hit_slow_path = false;
if (i915_gem_object_is_tiled(obj)) ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
return -EFAULT; if (ret)
return ret;
intel_runtime_pm_get(i915); intel_runtime_pm_get(i915);
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
@ -1198,21 +1176,17 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
if (IS_ERR(vma)) { if (IS_ERR(vma)) {
ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
if (ret) if (ret)
goto out; goto out_unlock;
GEM_BUG_ON(!node.allocated);
ret = i915_gem_object_pin_pages(obj);
if (ret) {
remove_mappable_node(&node);
goto out;
}
} }
ret = i915_gem_object_set_to_gtt_domain(obj, true); ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret) if (ret)
goto out_unpin; goto out_unpin;
mutex_unlock(&i915->drm.struct_mutex);
intel_fb_obj_invalidate(obj, ORIGIN_CPU); intel_fb_obj_invalidate(obj, ORIGIN_CPU);
obj->mm.dirty = true;
user_data = u64_to_user_ptr(args->data_ptr); user_data = u64_to_user_ptr(args->data_ptr);
offset = args->offset; offset = args->offset;
@ -1243,92 +1217,36 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
* If the object is non-shmem backed, we retry again with the * If the object is non-shmem backed, we retry again with the
* path that handles page fault. * path that handles page fault.
*/ */
if (fast_user_write(&ggtt->mappable, page_base, if (ggtt_write(&ggtt->mappable, page_base, page_offset,
page_offset, user_data, page_length)) { user_data, page_length)) {
hit_slow_path = true;
mutex_unlock(&dev->struct_mutex);
if (slow_user_access(&ggtt->mappable,
page_base,
page_offset, user_data,
page_length, true)) {
ret = -EFAULT; ret = -EFAULT;
mutex_lock(&dev->struct_mutex); break;
goto out_flush;
}
mutex_lock(&dev->struct_mutex);
} }
remain -= page_length; remain -= page_length;
user_data += page_length; user_data += page_length;
offset += page_length; offset += page_length;
} }
out_flush:
if (hit_slow_path) {
if (ret == 0 &&
(obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
/* The user has modified the object whilst we tried
* reading from it, and we now have no idea what domain
* the pages should be in. As we have just been touching
* them directly, flush everything back to the GTT
* domain.
*/
ret = i915_gem_object_set_to_gtt_domain(obj, false);
}
}
intel_fb_obj_flush(obj, false, ORIGIN_CPU); intel_fb_obj_flush(obj, false, ORIGIN_CPU);
mutex_lock(&i915->drm.struct_mutex);
out_unpin: out_unpin:
if (node.allocated) { if (node.allocated) {
wmb(); wmb();
ggtt->base.clear_range(&ggtt->base, ggtt->base.clear_range(&ggtt->base,
node.start, node.size); node.start, node.size);
i915_gem_object_unpin_pages(obj);
remove_mappable_node(&node); remove_mappable_node(&node);
} else { } else {
i915_vma_unpin(vma); i915_vma_unpin(vma);
} }
out: out_unlock:
intel_runtime_pm_put(i915); intel_runtime_pm_put(i915);
mutex_unlock(&i915->drm.struct_mutex);
return ret; return ret;
} }
/* Per-page copy function for the shmem pwrite fastpath.
* Flushes invalid cachelines before writing to the target if
* needs_clflush_before is set and flushes out any written cachelines after
* writing if needs_clflush is set. */
static int static int
shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, shmem_pwrite_slow(struct page *page, int offset, int length,
char __user *user_data,
bool page_do_bit17_swizzling,
bool needs_clflush_before,
bool needs_clflush_after)
{
char *vaddr;
int ret;
if (unlikely(page_do_bit17_swizzling))
return -EINVAL;
vaddr = kmap_atomic(page);
if (needs_clflush_before)
drm_clflush_virt_range(vaddr + shmem_page_offset,
page_length);
ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
user_data, page_length);
if (needs_clflush_after)
drm_clflush_virt_range(vaddr + shmem_page_offset,
page_length);
kunmap_atomic(vaddr);
return ret ? -EFAULT : 0;
}
/* Only difference to the fast-path function is that this can handle bit17
* and uses non-atomic copy and kmap functions. */
static int
shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
char __user *user_data, char __user *user_data,
bool page_do_bit17_swizzling, bool page_do_bit17_swizzling,
bool needs_clflush_before, bool needs_clflush_before,
@ -1339,124 +1257,114 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
vaddr = kmap(page); vaddr = kmap(page);
if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
shmem_clflush_swizzled_range(vaddr + shmem_page_offset, shmem_clflush_swizzled_range(vaddr + offset, length,
page_length,
page_do_bit17_swizzling); page_do_bit17_swizzling);
if (page_do_bit17_swizzling) if (page_do_bit17_swizzling)
ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, ret = __copy_from_user_swizzled(vaddr, offset, user_data,
user_data, length);
page_length);
else else
ret = __copy_from_user(vaddr + shmem_page_offset, ret = __copy_from_user(vaddr + offset, user_data, length);
user_data,
page_length);
if (needs_clflush_after) if (needs_clflush_after)
shmem_clflush_swizzled_range(vaddr + shmem_page_offset, shmem_clflush_swizzled_range(vaddr + offset, length,
page_length,
page_do_bit17_swizzling); page_do_bit17_swizzling);
kunmap(page); kunmap(page);
return ret ? -EFAULT : 0; return ret ? -EFAULT : 0;
} }
/* Per-page copy function for the shmem pwrite fastpath.
* Flushes invalid cachelines before writing to the target if
* needs_clflush_before is set and flushes out any written cachelines after
* writing if needs_clflush is set.
*/
static int static int
i915_gem_shmem_pwrite(struct drm_device *dev, shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
struct drm_i915_gem_object *obj, bool page_do_bit17_swizzling,
struct drm_i915_gem_pwrite *args, bool needs_clflush_before,
struct drm_file *file) bool needs_clflush_after)
{ {
ssize_t remain; int ret;
loff_t offset;
char __user *user_data;
int shmem_page_offset, page_length, ret = 0;
int obj_do_bit17_swizzling, page_do_bit17_swizzling;
int hit_slowpath = 0;
unsigned int needs_clflush;
struct sg_page_iter sg_iter;
ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); ret = -ENODEV;
if (!page_do_bit17_swizzling) {
char *vaddr = kmap_atomic(page);
if (needs_clflush_before)
drm_clflush_virt_range(vaddr + offset, len);
ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
if (needs_clflush_after)
drm_clflush_virt_range(vaddr + offset, len);
kunmap_atomic(vaddr);
}
if (ret == 0)
return ret;
return shmem_pwrite_slow(page, offset, len, user_data,
page_do_bit17_swizzling,
needs_clflush_before,
needs_clflush_after);
}
static int
i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pwrite *args)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
void __user *user_data;
u64 remain;
unsigned int obj_do_bit17_swizzling;
unsigned int partial_cacheline_write;
unsigned int needs_clflush;
unsigned int offset, idx;
int ret;
ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
if (ret) if (ret)
return ret; return ret;
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
user_data = u64_to_user_ptr(args->data_ptr); mutex_unlock(&i915->drm.struct_mutex);
offset = args->offset; if (ret)
remain = args->size; return ret;
for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents, obj_do_bit17_swizzling = 0;
offset >> PAGE_SHIFT) { if (i915_gem_object_needs_bit17_swizzle(obj))
struct page *page = sg_page_iter_page(&sg_iter); obj_do_bit17_swizzling = BIT(17);
int partial_cacheline_write;
if (remain <= 0)
break;
/* Operation in this page
*
* shmem_page_offset = offset within page in shmem file
* page_length = bytes to copy for this page
*/
shmem_page_offset = offset_in_page(offset);
page_length = remain;
if ((shmem_page_offset + page_length) > PAGE_SIZE)
page_length = PAGE_SIZE - shmem_page_offset;
/* If we don't overwrite a cacheline completely we need to be /* If we don't overwrite a cacheline completely we need to be
* careful to have up-to-date data by first clflushing. Don't * careful to have up-to-date data by first clflushing. Don't
* overcomplicate things and flush the entire patch. */ * overcomplicate things and flush the entire patch.
partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
((shmem_page_offset | page_length)
& (boot_cpu_data.x86_clflush_size - 1));
page_do_bit17_swizzling = obj_do_bit17_swizzling &&
(page_to_phys(page) & (1 << 17)) != 0;
ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
user_data, page_do_bit17_swizzling,
partial_cacheline_write,
needs_clflush & CLFLUSH_AFTER);
if (ret == 0)
goto next_page;
hit_slowpath = 1;
mutex_unlock(&dev->struct_mutex);
ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
user_data, page_do_bit17_swizzling,
partial_cacheline_write,
needs_clflush & CLFLUSH_AFTER);
mutex_lock(&dev->struct_mutex);
if (ret)
goto out;
next_page:
remain -= page_length;
user_data += page_length;
offset += page_length;
}
out:
i915_gem_obj_finish_shmem_access(obj);
if (hit_slowpath) {
/*
* Fixup: Flush cpu caches in case we didn't flush the dirty
* cachelines in-line while writing and the object moved
* out of the cpu write domain while we've dropped the lock.
*/ */
if (!(needs_clflush & CLFLUSH_AFTER) && partial_cacheline_write = 0;
obj->base.write_domain != I915_GEM_DOMAIN_CPU) { if (needs_clflush & CLFLUSH_BEFORE)
if (i915_gem_clflush_object(obj, obj->pin_display)) partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
needs_clflush |= CLFLUSH_AFTER;
}
}
if (needs_clflush & CLFLUSH_AFTER) user_data = u64_to_user_ptr(args->data_ptr);
i915_gem_chipset_flush(to_i915(dev)); remain = args->size;
offset = offset_in_page(args->offset);
for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
struct page *page = i915_gem_object_get_page(obj, idx);
int length;
length = remain;
if (offset + length > PAGE_SIZE)
length = PAGE_SIZE - offset;
ret = shmem_pwrite(page, offset, length, user_data,
page_to_phys(page) & obj_do_bit17_swizzling,
(offset | length) & partial_cacheline_write,
needs_clflush & CLFLUSH_AFTER);
if (ret)
break;
remain -= length;
user_data += length;
offset = 0;
}
intel_fb_obj_flush(obj, false, ORIGIN_CPU); intel_fb_obj_flush(obj, false, ORIGIN_CPU);
i915_gem_obj_finish_shmem_access(obj);
return ret; return ret;
} }
@ -1472,7 +1380,6 @@ int
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
struct drm_file *file) struct drm_file *file)
{ {
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_pwrite *args = data; struct drm_i915_gem_pwrite *args = data;
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
int ret; int ret;
@ -1485,13 +1392,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
args->size)) args->size))
return -EFAULT; return -EFAULT;
if (likely(!i915.prefault_disable)) {
ret = fault_in_pages_readable(u64_to_user_ptr(args->data_ptr),
args->size);
if (ret)
return -EFAULT;
}
obj = i915_gem_object_lookup(file, args->handle); obj = i915_gem_object_lookup(file, args->handle);
if (!obj) if (!obj)
return -ENOENT; return -ENOENT;
@ -1513,11 +1413,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (ret) if (ret)
goto err; goto err;
intel_runtime_pm_get(dev_priv); ret = i915_gem_object_pin_pages(obj);
ret = i915_mutex_lock_interruptible(dev);
if (ret) if (ret)
goto err_rpm; goto err;
ret = -EFAULT; ret = -EFAULT;
/* We can only do the GTT pwrite on untiled buffers, as otherwise /* We can only do the GTT pwrite on untiled buffers, as otherwise
@ -1532,23 +1430,16 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
* pointers (e.g. gtt mappings when moving data between * pointers (e.g. gtt mappings when moving data between
* textures). Fallback to the shmem path in that case. * textures). Fallback to the shmem path in that case.
*/ */
ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); ret = i915_gem_gtt_pwrite_fast(obj, args);
if (ret == -EFAULT || ret == -ENOSPC) { if (ret == -EFAULT || ret == -ENOSPC) {
if (obj->phys_handle) if (obj->phys_handle)
ret = i915_gem_phys_pwrite(obj, args, file); ret = i915_gem_phys_pwrite(obj, args, file);
else else
ret = i915_gem_shmem_pwrite(dev, obj, args, file); ret = i915_gem_shmem_pwrite(obj, args);
} }
i915_gem_object_put(obj); i915_gem_object_unpin_pages(obj);
mutex_unlock(&dev->struct_mutex);
intel_runtime_pm_put(dev_priv);
return ret;
err_rpm:
intel_runtime_pm_put(dev_priv);
err: err:
i915_gem_object_put_unlocked(obj); i915_gem_object_put_unlocked(obj);
return ret; return ret;