Index: sys/arch/amd64/amd64/trap.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/amd64/trap.c,v retrieving revision 1.87 diff -u -p -u -p -r1.87 trap.c --- sys/arch/amd64/amd64/trap.c 22 Oct 2020 13:41:51 -0000 1.87 +++ sys/arch/amd64/amd64/trap.c 22 Dec 2020 14:38:09 -0000 @@ -176,10 +176,7 @@ upageflttrap(struct trapframe *frame, ui union sigval sv; int signal, sicode, error; - KERNEL_LOCK(); error = uvm_fault(&p->p_vmspace->vm_map, va, 0, access_type); - KERNEL_UNLOCK(); - if (error == 0) { uvm_grow(p, va); return 1; @@ -261,9 +258,7 @@ kpageflttrap(struct trapframe *frame, ui if (curcpu()->ci_inatomic == 0 || map == kernel_map) { onfault = pcb->pcb_onfault; pcb->pcb_onfault = NULL; - KERNEL_LOCK(); error = uvm_fault(map, va, 0, access_type); - KERNEL_UNLOCK(); pcb->pcb_onfault = onfault; if (error == 0 && map != kernel_map) Index: sys/arch/amd64/conf/GENERIC.MP =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/conf/GENERIC.MP,v retrieving revision 1.14 diff -u -p -u -p -r1.14 GENERIC.MP --- sys/arch/amd64/conf/GENERIC.MP 13 Jul 2018 05:25:24 -0000 1.14 +++ sys/arch/amd64/conf/GENERIC.MP 22 Dec 2020 14:38:09 -0000 @@ -4,6 +4,6 @@ include "arch/amd64/conf/GENERIC" option MULTIPROCESSOR #option MP_LOCKDEBUG -#option WITNESS +option WITNESS cpu* at mainbus? Index: sys/arch/sparc64/sparc64/trap.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/sparc64/sparc64/trap.c,v retrieving revision 1.107 diff -u -p -u -p -r1.107 trap.c --- sys/arch/sparc64/sparc64/trap.c 23 Oct 2020 16:54:35 -0000 1.107 +++ sys/arch/sparc64/sparc64/trap.c 22 Dec 2020 14:38:09 -0000 @@ -773,10 +773,7 @@ data_access_fault(struct trapframe64 *tf if (!(addr & TLB_TAG_ACCESS_CTX)) { /* CTXT == NUCLEUS */ - KERNEL_LOCK(); error = uvm_fault(kernel_map, va, 0, access_type); - KERNEL_UNLOCK(); - if (error == 0) return; goto kfault; @@ -792,9 +789,7 @@ data_access_fault(struct trapframe64 *tf onfault = (vaddr_t)p->p_addr->u_pcb.pcb_onfault; p->p_addr->u_pcb.pcb_onfault = NULL; - KERNEL_LOCK(); error = uvm_fault(&p->p_vmspace->vm_map, (vaddr_t)va, 0, access_type); - KERNEL_UNLOCK(); p->p_addr->u_pcb.pcb_onfault = (void *)onfault; /* @@ -959,9 +954,7 @@ text_access_fault(struct trapframe64 *tf uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) goto out; - KERNEL_LOCK(); error = uvm_fault(&p->p_vmspace->vm_map, va, 0, access_type); - KERNEL_UNLOCK(); /* * If this was a stack access we keep track of the maximum @@ -1055,9 +1048,7 @@ text_access_error(struct trapframe64 *tf uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) goto out; - KERNEL_LOCK(); error = uvm_fault(&p->p_vmspace->vm_map, va, 0, access_type); - KERNEL_UNLOCK(); /* * If this was a stack access we keep track of the maximum Index: sys/kern/init_main.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/init_main.c,v retrieving revision 1.302 diff -u -p -u -p -r1.302 init_main.c --- sys/kern/init_main.c 7 Dec 2020 16:55:28 -0000 1.302 +++ sys/kern/init_main.c 22 Dec 2020 14:38:09 -0000 @@ -232,6 +232,7 @@ main(void *framep) KERNEL_LOCK_INIT(); SCHED_LOCK_INIT(); + rw_obj_init(); uvm_init(); disk_init(); /* must come before autoconfiguration */ tty_init(); /* initialise tty's */ @@ -432,6 +433,7 @@ main(void *framep) #endif mbcpuinit(); /* enable per cpu mbuf data */ + uvm_init_percpu(); /* init exec and emul */ init_exec(); Index: sys/kern/kern_rwlock.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/kern_rwlock.c,v retrieving revision 1.45 diff -u -p -u -p -r1.45 kern_rwlock.c --- sys/kern/kern_rwlock.c 2 Mar 2020 17:07:49 -0000 1.45 +++ sys/kern/kern_rwlock.c 22 Dec 2020 14:38:09 -0000 @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -487,4 +488,135 @@ int rrw_status(struct rrwlock *rrwl) { return (rw_status(&rrwl->rrwl_lock)); +} + +/*- + * Copyright (c) 2008 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Andrew Doran. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define RWLOCK_OBJ_MAGIC 0x5aa3c85d +struct rwlock_obj { + struct rwlock ro_lock; + u_int ro_magic; + u_int ro_refcnt; +}; + + +struct pool rwlock_obj_pool; + +/* + * rw_obj_init: + * + * Initialize the mutex object store. + */ +void +rw_obj_init(void) +{ + /* XXX PR_RWLOCK because of: + * + * witness_checkorder() at witness_checkorder+0xb42 + * __mp_lock() at __mp_lock+0x62 + * softintr_dispatch(0) at softintr_dispatch+0x4a + * Xsoftclock() at Xsoftclock+0x1f + * pool_lock_mtx_assert_locked() at pool_lock_mtx_assert_locked + * pool_get() at pool_get+0x7f + * _rw_obj_alloc_flags() at _rw_obj_alloc_flags+0x3b + * amap_alloc(1000,2,0) at amap_alloc+0x55 + * amap_copy() at amap_copy+0x339 + * uvm_fault_check() at uvm_fault_check+0x2a2 + */ + pool_init(&rwlock_obj_pool, sizeof(struct rwlock_obj), 0, IPL_NONE, + PR_WAITOK | PR_RWLOCK, "rwobjpl", NULL); +} + +/* + * rw_obj_alloc: + * + * Allocate a single lock object. + */ +void +_rw_obj_alloc_flags(struct rwlock **lock, const char *name, int flags, + struct lock_type *type) +{ + struct rwlock_obj *mo; + + mo = pool_get(&rwlock_obj_pool, PR_WAITOK); + mo->ro_magic = RWLOCK_OBJ_MAGIC; + _rw_init_flags(&mo->ro_lock, name, flags, type); + mo->ro_refcnt = 1; + + *lock = &mo->ro_lock; +} + +/* + * rw_obj_hold: + * + * Add a single reference to a lock object. A reference to the object + * must already be held, and must be held across this call. + */ + +void +rw_obj_hold(struct rwlock *lock) +{ + struct rwlock_obj *mo = (struct rwlock_obj *)lock; + + KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC, + "%s: lock %p: mo->ro_magic (%#x) != RWLOCK_OBJ_MAGIC (%#x)", + __func__, mo, mo->ro_magic, RWLOCK_OBJ_MAGIC); + KASSERTMSG(mo->ro_refcnt > 0, + "%s: lock %p: mo->ro_refcnt (%#x) == 0", + __func__, mo, mo->ro_refcnt); + + atomic_inc_int(&mo->ro_refcnt); +} + +/* + * rw_obj_free: + * + * Drop a reference from a lock object. If the last reference is being + * dropped, free the object and return true. Otherwise, return false. + */ +int +rw_obj_free(struct rwlock *lock) +{ + struct rwlock_obj *mo = (struct rwlock_obj *)lock; + + KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC, + "%s: lock %p: mo->ro_magic (%#x) != RWLOCK_OBJ_MAGIC (%#x)", + __func__, mo, mo->ro_magic, RWLOCK_OBJ_MAGIC); + KASSERTMSG(mo->ro_refcnt > 0, + "%s: lock %p: mo->ro_refcnt (%#x) == 0", + __func__, mo, mo->ro_refcnt); + + if (atomic_dec_int_nv(&mo->ro_refcnt) > 0) { + return false; + } + //WITNESS_DESTROY(&mo->ro_lock); + pool_put(&rwlock_obj_pool, mo); + return true; } Index: sys/kern/subr_pool.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.230 diff -u -p -u -p -r1.230 subr_pool.c --- sys/kern/subr_pool.c 24 Jan 2020 06:31:17 -0000 1.230 +++ sys/kern/subr_pool.c 22 Dec 2020 14:38:09 -0000 @@ -563,8 +563,6 @@ pool_get(struct pool *pp, int flags) int slowdown = 0; KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); - if (pp->pr_flags & PR_RWLOCK) - KASSERT(flags & PR_WAITOK); #ifdef MULTIPROCESSOR if (pp->pr_cache != NULL) { Index: sys/sys/rwlock.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/sys/rwlock.h,v retrieving revision 1.27 diff -u -p -u -p -r1.27 rwlock.h --- sys/sys/rwlock.h 15 Dec 2020 10:23:01 -0000 1.27 +++ sys/sys/rwlock.h 22 Dec 2020 14:38:09 -0000 @@ -209,6 +209,28 @@ int rrw_status(struct rrwlock *); #define rrw_init(rrwl, name) _rrw_init_flags(rrwl, name, 0, NULL) #endif /* WITNESS */ + +/* + * Allocated, reference-counted rwlocks + */ + +#ifdef WITNESS +#define rw_obj_alloc_flags(rwl, name, flags) do { \ + static struct lock_type __lock_type = { .lt_name = #rwl }; \ + _rw_obj_alloc_flags(rwl, name, flags, &__lock_type); \ +} while (0) +#else +#define rw_obj_alloc_flags(rwl, name, flags) \ + _rw_obj_alloc_flags(rwl, name, flags, NULL) +#endif +#define rw_obj_alloc(rwl, name) rw_obj_alloc_flags(rwl, name, 0) + +void rw_obj_init(void); +void _rw_obj_alloc_flags(struct rwlock **, const char *, int, + struct lock_type *); +void rw_obj_hold(struct rwlock *); +int rw_obj_free(struct rwlock *); + #endif /* _KERNEL */ #endif /* _SYS_RWLOCK_H */ Index: sys/uvm/uvm_amap.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_amap.c,v retrieving revision 1.86 diff -u -p -u -p -r1.86 uvm_amap.c --- sys/uvm/uvm_amap.c 13 Nov 2020 11:11:48 -0000 1.86 +++ sys/uvm/uvm_amap.c 22 Dec 2020 14:38:09 -0000 @@ -55,6 +55,9 @@ struct pool uvm_small_amap_pool[UVM_AMAP struct pool uvm_amap_chunk_pool; LIST_HEAD(, vm_amap) amap_list; +struct rwlock amap_list_lock = RWLOCK_INITIALIZER("amaplstlk"); +#define amap_lock_list() rw_enter_write(&amap_list_lock) +#define amap_unlock_list() rw_exit_write(&amap_list_lock) static char amap_small_pool_names[UVM_AMAP_CHUNK][9]; @@ -89,13 +92,17 @@ void amap_wiperange(struct vm_amap *, in static inline void amap_list_insert(struct vm_amap *amap) { + amap_lock_list(); LIST_INSERT_HEAD(&amap_list, amap, am_list); + amap_unlock_list(); } static inline void amap_list_remove(struct vm_amap *amap) -{ +{ + amap_lock_list(); LIST_REMOVE(amap, am_list); + amap_unlock_list(); } /* @@ -249,7 +256,7 @@ amap_init(void) /* Initialize the vm_amap pool. */ pool_init(&uvm_amap_pool, sizeof(struct vm_amap), - 0, IPL_NONE, PR_WAITOK, "amappl", NULL); + 0, IPL_NONE, PR_WAITOK | PR_RWLOCK, "amappl", NULL); pool_sethiwat(&uvm_amap_pool, 4096); /* initialize small amap pools */ @@ -258,13 +265,13 @@ amap_init(void) sizeof(amap_small_pool_names[0]), "amappl%d", i + 1); size = offsetof(struct vm_amap, am_small.ac_anon) + (i + 1) * sizeof(struct vm_anon *); - pool_init(&uvm_small_amap_pool[i], size, 0, - IPL_NONE, 0, amap_small_pool_names[i], NULL); + pool_init(&uvm_small_amap_pool[i], size, 0, IPL_NONE, + PR_WAITOK | PR_RWLOCK, amap_small_pool_names[i], NULL); } pool_init(&uvm_amap_chunk_pool, sizeof(struct vm_amap_chunk) + UVM_AMAP_CHUNK * sizeof(struct vm_anon *), - 0, IPL_NONE, 0, "amapchunkpl", NULL); + 0, IPL_NONE, PR_WAITOK | PR_RWLOCK, "amapchunkpl", NULL); pool_sethiwat(&uvm_amap_chunk_pool, 4096); } @@ -332,6 +339,7 @@ amap_alloc1(int slots, int waitf, int la if (amap == NULL) return(NULL); + amap->am_lock = NULL; amap->am_ref = 1; amap->am_flags = 0; #ifdef UVM_AMAP_PPREF @@ -389,6 +397,12 @@ fail1: return (NULL); } +static void +amap_lock_alloc(struct vm_amap *amap) +{ + rw_obj_alloc(&amap->am_lock, "amaplk"); +} + /* * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM * @@ -406,8 +420,10 @@ amap_alloc(vaddr_t sz, int waitf, int la return (NULL); amap = amap_alloc1(slots, waitf, lazyalloc); - if (amap) + if (amap != NULL) { + amap_lock_alloc(amap); amap_list_insert(amap); + } return(amap); } @@ -426,6 +442,11 @@ amap_free(struct vm_amap *amap) KASSERT(amap->am_ref == 0 && amap->am_nused == 0); KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0); + if (amap->am_lock != NULL) { + KASSERT(amap->am_lock == NULL || !rw_write_held(amap->am_lock)); + rw_obj_free(amap->am_lock); + } + #ifdef UVM_AMAP_PPREF if (amap->am_ppref && amap->am_ppref != PPREF_NONE) free(amap->am_ppref, M_UVMAMAP, amap->am_nslot * sizeof(int)); @@ -447,6 +468,7 @@ amap_free(struct vm_amap *amap) * * => called from amap_unref when the final reference to an amap is * discarded (i.e. when reference count == 1) + * => amap must be locked. */ void @@ -457,15 +479,16 @@ amap_wipeout(struct vm_amap *amap) struct vm_amap_chunk *chunk; struct pglist pgl; + KASSERT(rw_write_held(amap->am_lock)); KASSERT(amap->am_ref == 0); if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) { /* amap_swap_off will call us again. */ + amap_unlock(amap); return; } TAILQ_INIT(&pgl); - amap_list_remove(amap); AMAP_CHUNK_FOREACH(chunk, amap) { @@ -478,6 +501,7 @@ amap_wipeout(struct vm_amap *amap) if (anon == NULL || anon->an_ref == 0) panic("amap_wipeout: corrupt amap"); + KASSERT(anon->an_lock == amap->am_lock); refs = --anon->an_ref; if (refs == 0) { @@ -495,7 +519,8 @@ amap_wipeout(struct vm_amap *amap) /* now we free the map */ amap->am_ref = 0; /* ... was one */ amap->am_nused = 0; - amap_free(amap); /* will free amap */ + amap_unlock(amap); + amap_free(amap); } /* @@ -503,6 +528,8 @@ amap_wipeout(struct vm_amap *amap) * by copying the amap if necessary. * * => an entry with a null amap pointer will get a new (blank) one. + * => the map that the map entry blocks to must be locked by caller. + * => the amap (if any) currently attached to the entry must be unlocked. * => if canchunk is true, then we may clip the entry into a chunk * => "startva" and "endva" are used only if canchunk is true. they are * used to limit chunking (e.g. if you have a large space that you @@ -519,6 +546,9 @@ amap_copy(struct vm_map *map, struct vm_ vaddr_t chunksize; int i, j, k, n, srcslot; struct vm_amap_chunk *chunk = NULL, *srcchunk = NULL; + struct vm_anon *anon; + + KASSERT(map != kernel_map); /* we use sleeping locks */ /* is there a map to copy? if not, create one from scratch. */ if (entry->aref.ar_amap == NULL) { @@ -574,6 +604,8 @@ amap_copy(struct vm_map *map, struct vm_ return; srcamap = entry->aref.ar_amap; + amap_lock(srcamap); + /* * need to double check reference count now. the reference count * could have changed while we were in malloc. if the reference count @@ -582,6 +614,7 @@ amap_copy(struct vm_map *map, struct vm_ */ if (srcamap->am_ref == 1) { /* take it over? */ entry->etype &= ~UVM_ET_NEEDSCOPY; + amap_unlock(srcamap); amap->am_ref--; /* drop final reference to map */ amap_free(amap); /* dispose of new (unused) amap */ return; @@ -606,18 +639,21 @@ amap_copy(struct vm_map *map, struct vm_ chunk = amap_chunk_get(amap, lcv, 1, PR_NOWAIT); if (chunk == NULL) { + amap_unlock(srcamap); amap->am_ref = 0; amap_wipeout(amap); return; } for (k = 0; k < n; i++, j++, k++) { - chunk->ac_anon[i] = srcchunk->ac_anon[j]; - if (chunk->ac_anon[i] == NULL) + chunk->ac_anon[i] = anon = srcchunk->ac_anon[j]; + if (anon == NULL) continue; + KASSERT(anon->an_lock == srcamap->am_lock); + KASSERT(anon->an_ref > 0); chunk->ac_usedmap |= (1 << i); - chunk->ac_anon[i]->an_ref++; + anon->an_ref++; amap->am_nused++; } } @@ -629,6 +665,8 @@ amap_copy(struct vm_map *map, struct vm_ * the count to zero. [and no need to worry about freeing it] */ srcamap->am_ref--; + KASSERT(srcamap->am_ref > 0); + if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ #ifdef UVM_AMAP_PPREF @@ -638,6 +676,20 @@ amap_copy(struct vm_map *map, struct vm_ } #endif + /* + * If we referenced any anons, then share the source amap's lock. + * Otherwise, we have nothing in common, so allocate a new one. + */ + KASSERT(amap->am_lock == NULL); + if (amap->am_nused != 0) { + amap->am_lock = srcamap->am_lock; + rw_obj_hold(amap->am_lock); + } + amap_unlock(srcamap); + + if (amap->am_lock == NULL) + amap_lock_alloc(amap); + /* install new amap. */ entry->aref.ar_pageoff = 0; entry->aref.ar_amap = amap; @@ -655,6 +707,7 @@ amap_copy(struct vm_map *map, struct vm_ * so we resolve the COW here. * * => assume parent's entry was wired, thus all pages are resident. + * => the parent and child vm_map must both be locked. * => caller passes child's map/entry in to us * => XXXCDC: out of memory should cause fork to fail, but there is * currently no easy way to do this (needs fix) @@ -675,6 +728,7 @@ amap_cow_now(struct vm_map *map, struct * am_anon[] array on us. */ ReStart: + amap_lock(amap); AMAP_CHUNK_FOREACH(chunk, amap) { int i, map = chunk->ac_usedmap; @@ -683,6 +737,7 @@ ReStart: map ^= 1 << slot; anon = chunk->ac_anon[slot]; pg = anon->an_page; + KASSERT(anon->an_lock == amap->am_lock); /* page must be resident since parent is wired */ KASSERT(pg != NULL); @@ -700,24 +755,27 @@ ReStart: */ if (pg->pg_flags & PG_BUSY) { atomic_setbits_int(&pg->pg_flags, PG_WANTED); - tsleep_nsec(pg, PVM, "cownow", INFSLP); + rwsleep_nsec(pg, amap->am_lock, PVM | PNORELOCK, + "cownow", INFSLP); goto ReStart; } /* ok, time to do a copy-on-write to a new anon */ nanon = uvm_analloc(); - if (nanon) { + if (nanon != NULL) { + /* the new anon will share the amap's lock */ + nanon->an_lock = amap->am_lock; npg = uvm_pagealloc(NULL, 0, nanon, 0); } else npg = NULL; /* XXX: quiet gcc warning */ if (nanon == NULL || npg == NULL) { /* out of memory */ - /* - * XXXCDC: we should cause fork to fail, but - * we can't ... - */ - if (nanon) { + amap_unlock(amap); + if (nanon != NULL) { + nanon->an_lock = NULL; + nanon->an_ref--; + KASSERT(nanon->an_ref == 0); uvm_anfree(nanon); } uvm_wait("cownowpage"); @@ -730,6 +788,7 @@ ReStart: */ uvm_pagecopy(pg, npg); /* old -> new */ anon->an_ref--; /* can't drop to zero */ + KASSERT(anon->an_ref > 0); chunk->ac_anon[slot] = nanon; /* replace */ /* @@ -744,6 +803,7 @@ ReStart: uvm_unlock_pageq(); } } + amap_unlock(amap); } /* @@ -757,10 +817,13 @@ amap_splitref(struct vm_aref *origref, s struct vm_amap *amap = origref->ar_amap; int leftslots; + KASSERT(splitref->ar_amap == amap); AMAP_B2SLOT(leftslots, offset); if (leftslots == 0) panic("amap_splitref: split at zero offset"); + amap_lock(amap); + /* now: we have a valid am_mapped array. */ if (amap->am_nslot - origref->ar_pageoff - leftslots <= 0) panic("amap_splitref: map size check failed"); @@ -775,6 +838,7 @@ amap_splitref(struct vm_aref *origref, s amap->am_ref++; splitref->ar_amap = amap; splitref->ar_pageoff = origref->ar_pageoff + leftslots; + amap_unlock(amap); } #ifdef UVM_AMAP_PPREF @@ -786,6 +850,7 @@ void amap_pp_establish(struct vm_amap *amap) { + KASSERT(rw_write_held(amap->am_lock)); amap->am_ppref = mallocarray(amap->am_nslot, sizeof(int), M_UVMAMAP, M_NOWAIT|M_ZERO); @@ -811,6 +876,8 @@ amap_pp_adjref(struct vm_amap *amap, int int stopslot, *ppref, lcv, prevlcv; int ref, len, prevref, prevlen; + KASSERT(rw_write_held(amap->am_lock)); + stopslot = curslot + slotlen; ppref = amap->am_ppref; prevlcv = 0; @@ -893,6 +960,7 @@ amap_wiperange_chunk(struct vm_amap *ama map ^= 1 << curslot; chunk->ac_usedmap ^= 1 << curslot; anon = chunk->ac_anon[curslot]; + KASSERT(anon->an_lock == amap->am_lock); /* remove it from the amap */ chunk->ac_anon[curslot] = NULL; @@ -902,10 +970,6 @@ amap_wiperange_chunk(struct vm_amap *ama /* drop anon reference count */ refs = --anon->an_ref; if (refs == 0) { - /* - * we just eliminated the last reference to an - * anon. free it. - */ uvm_anfree(anon); } } @@ -921,6 +985,8 @@ amap_wiperange(struct vm_amap *amap, int int bucket, startbucket, endbucket; struct vm_amap_chunk *chunk, *nchunk; + KASSERT(rw_write_held(amap->am_lock)); + startbucket = UVM_AMAP_BUCKET(amap, slotoff); endbucket = UVM_AMAP_BUCKET(amap, slotoff + slots - 1); @@ -980,12 +1046,24 @@ amap_swap_off(int startslot, int endslot { struct vm_amap *am; struct vm_amap *am_next; + struct vm_amap marker; boolean_t rv = FALSE; + amap_lock_list(); for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) { int i, map; struct vm_amap_chunk *chunk; + amap_lock(am); + if (am->am_nused == 0) { + amap_unlock(am); + am_next = LIST_NEXT(am, am_list); + continue; + } + + LIST_INSERT_AFTER(am, &marker, am_list); + amap_unlock_list(); + again: AMAP_CHUNK_FOREACH(chunk, am) { map = chunk->ac_usedmap; @@ -1005,20 +1083,28 @@ again: am->am_flags |= AMAP_SWAPOFF; - rv = uvm_anon_pagein(anon); + rv = uvm_anon_pagein(am, anon); + amap_lock(am); am->am_flags &= ~AMAP_SWAPOFF; - if (rv || amap_refs(am) == 0) + if (amap_refs(am) == 0) { + amap_wipeout(am); + am = NULL; + goto nextamap; + } + if (rv) goto nextamap; goto again; } } - nextamap: - am_next = LIST_NEXT(am, am_list); - if (amap_refs(am) == 0) - amap_wipeout(am); + if (am != NULL) + amap_unlock(am); + amap_lock_list(); + am_next = LIST_NEXT(&marker, am_list); + LIST_REMOVE(&marker, am_list); } + amap_unlock_list(); return rv; } @@ -1147,9 +1233,11 @@ amap_add(struct vm_aref *aref, vaddr_t o void amap_unadd(struct vm_aref *aref, vaddr_t offset) { - int slot; struct vm_amap *amap = aref->ar_amap; struct vm_amap_chunk *chunk; + int slot; + + KASSERT(rw_write_held(amap->am_lock)); AMAP_B2SLOT(slot, offset); slot += aref->ar_pageoff; @@ -1176,6 +1264,12 @@ amap_adjref_anons(struct vm_amap *amap, int refv, boolean_t all) { #ifdef UVM_AMAP_PPREF + KASSERT(rw_write_held(amap->am_lock)); + + /* + * We must establish the ppref array before changing am_ref + * so that the ppref values match the current amap refcount. + */ if (amap->am_ppref == NULL && !all && len != amap->am_nslot) { amap_pp_establish(amap); } @@ -1192,32 +1286,37 @@ amap_adjref_anons(struct vm_amap *amap, } } #endif + amap_unlock(amap); } /* - * amap_ref: gain a reference to an amap + * amap_ref: gain a reference to an amap. * - * => "offset" and "len" are in units of pages - * => called at fork time to gain the child's reference + * => amap must not be locked (we will lock). + * => "offset" and "len" are in units of pages. + * => Called at fork time to gain the child's reference. */ void amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags) { - + amap_lock(amap); if (flags & AMAP_SHARED) amap->am_flags |= AMAP_SHARED; amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0); } /* - * amap_unref: remove a reference to an amap + * amap_unref: remove a reference to an amap. * * => All pmap-level references to this amap must be already removed. * => Called from uvm_unmap_detach(); entry is already removed from the map. + * => We will lock amap, so it must be unlocked. */ void amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all) { + amap_lock(amap); + KASSERT(amap->am_ref > 0); if (amap->am_ref == 1) { Index: sys/uvm/uvm_amap.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_amap.h,v retrieving revision 1.32 diff -u -p -u -p -r1.32 uvm_amap.h --- sys/uvm/uvm_amap.h 13 Nov 2020 11:11:49 -0000 1.32 +++ sys/uvm/uvm_amap.h 22 Dec 2020 14:38:09 -0000 @@ -133,6 +133,7 @@ struct vm_amap_chunk { }; struct vm_amap { + struct rwlock *am_lock; /* lock for all vm_amap flags */ int am_ref; /* reference count */ int am_flags; /* flags */ int am_nslot; /* # of slots currently in map */ @@ -260,6 +261,9 @@ struct vm_amap { #define amap_flags(AMAP) ((AMAP)->am_flags) #define amap_refs(AMAP) ((AMAP)->am_ref) + +#define amap_lock(AMAP) rw_enter_write((AMAP)->am_lock) +#define amap_unlock(AMAP) rw_exit_write((AMAP)->am_lock) #endif /* _KERNEL */ Index: sys/uvm/uvm_anon.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_anon.c,v retrieving revision 1.50 diff -u -p -u -p -r1.50 uvm_anon.c --- sys/uvm/uvm_anon.c 24 Nov 2020 13:49:09 -0000 1.50 +++ sys/uvm/uvm_anon.c 22 Dec 2020 14:38:09 -0000 @@ -49,7 +49,7 @@ void uvm_anon_init(void) { pool_init(&uvm_anon_pool, sizeof(struct vm_anon), 0, IPL_NONE, - PR_WAITOK, "anonpl", NULL); + PR_WAITOK | PR_RWLOCK, "anonpl", NULL); pool_sethiwat(&uvm_anon_pool, uvmexp.free / 16); } @@ -63,6 +63,7 @@ uvm_analloc(void) anon = pool_get(&uvm_anon_pool, PR_NOWAIT); if (anon) { + anon->an_lock = NULL; anon->an_ref = 1; anon->an_page = NULL; anon->an_swslot = 0; @@ -71,25 +72,26 @@ uvm_analloc(void) } /* - * uvm_anfree: free a single anon structure + * uvm_anfree_list: free a single anon structure * - * => caller must remove anon from its amap before calling (if it was in - * an amap). + * => anon must be removed from the amap (if anon was in an amap). + * => amap must be locked, if anon was owned by amap. * => we may lock the pageq's. */ void uvm_anfree_list(struct vm_anon *anon, struct pglist *pgl) { - struct vm_page *pg; + struct vm_page *pg = anon->an_page; - /* get page */ - pg = anon->an_page; + KASSERT(anon->an_lock == NULL || rw_write_held(anon->an_lock)); + KASSERT(anon->an_ref == 0); /* - * if we have a resident page, we must dispose of it before freeing - * the anon. + * Dispose of the page, if it is resident. */ - if (pg) { + if (pg != NULL) { + KASSERT(anon->an_lock != NULL); + /* * if page is busy then we just mark it as released (who ever * has it busy must check for this when they wake up). if the @@ -98,6 +100,7 @@ uvm_anfree_list(struct vm_anon *anon, st if ((pg->pg_flags & PG_BUSY) != 0) { /* tell them to dump it when done */ atomic_setbits_int(&pg->pg_flags, PG_RELEASED); + rw_obj_hold(anon->an_lock); return; } pmap_page_protect(pg, PROT_NONE); @@ -115,12 +118,14 @@ uvm_anfree_list(struct vm_anon *anon, st uvm_pagefree(pg); /* bye bye */ uvm_unlock_pageq(); /* free the daemon */ } + } else { + if (anon->an_swslot != 0) { + /* this page is no longer only in swap. */ + KASSERT(uvmexp.swpgonly > 0); + uvmexp.swpgonly--; + } } - if (pg == NULL && anon->an_swslot != 0) { - /* this page is no longer only in swap. */ - KASSERT(uvmexp.swpgonly > 0); - uvmexp.swpgonly--; - } + anon->an_lock = NULL; /* free any swap resources. */ uvm_anon_dropswap(anon); @@ -135,12 +140,6 @@ uvm_anfree_list(struct vm_anon *anon, st pool_put(&uvm_anon_pool, anon); } -void -uvm_anfree(struct vm_anon *anon) -{ - uvm_anfree_list(anon, NULL); -} - /* * uvm_anwait: wait for memory to become available to allocate an anon. */ @@ -155,35 +154,25 @@ uvm_anwait(void) } /* - * uvm_anon_dropswap: release any swap resources from this anon. - */ -void -uvm_anon_dropswap(struct vm_anon *anon) -{ - - if (anon->an_swslot == 0) - return; - - uvm_swap_free(anon->an_swslot, 1); - anon->an_swslot = 0; -} - -/* * fetch an anon's page. * * => returns TRUE if pagein was aborted due to lack of memory. */ boolean_t -uvm_anon_pagein(struct vm_anon *anon) +uvm_anon_pagein(struct vm_amap *amap, struct vm_anon *anon) { struct vm_page *pg; int rv; - rv = uvmfault_anonget(NULL, NULL, anon); + KASSERT(rw_write_held(anon->an_lock)); + KASSERT(anon->an_lock == amap->am_lock); + + rv = uvmfault_anonget(NULL, amap, anon); switch (rv) { case VM_PAGER_OK: + KASSERT(rw_write_held(anon->an_lock)); break; case VM_PAGER_ERROR: case VM_PAGER_REFAULT: @@ -206,7 +195,9 @@ uvm_anon_pagein(struct vm_anon *anon) * mark it as dirty, clear its swslot and un-busy it. */ pg = anon->an_page; - uvm_swap_free(anon->an_swslot, 1); + if (anon->an_swslot > 0) { + uvm_swap_free(anon->an_swslot, 1); + } anon->an_swslot = 0; atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); @@ -216,6 +207,57 @@ uvm_anon_pagein(struct vm_anon *anon) uvm_lock_pageq(); uvm_pagedeactivate(pg); uvm_unlock_pageq(); + rw_exit(anon->an_lock); return FALSE; +} + +/* + * uvm_anon_dropswap: release any swap resources from this anon. + * + * => anon must be locked or have a reference count of 0. + */ +void +uvm_anon_dropswap(struct vm_anon *anon) +{ + KASSERT(anon->an_ref == 0 || rw_lock_held(anon->an_lock)); + + if (anon->an_swslot == 0) + return; + + uvm_swap_free(anon->an_swslot, 1); + anon->an_swslot = 0; +} + + +/* + * uvm_anon_release: release an anon and its page. + * + * => anon should not have any references. + * => anon must be locked. + */ + +void +uvm_anon_release(struct vm_anon *anon) +{ + struct vm_page *pg = anon->an_page; + struct rwlock *lock; + + KASSERT(rw_write_held(anon->an_lock)); + KASSERT(pg != NULL); + KASSERT((pg->pg_flags & PG_RELEASED) != 0); + KASSERT((pg->pg_flags & PG_BUSY) != 0); + KASSERT(pg->uobject == NULL); + KASSERT(pg->uanon == anon); + KASSERT(anon->an_ref == 0); + + uvm_lock_pageq(); + uvm_pagefree(pg); + uvm_unlock_pageq(); + KASSERT(anon->an_page == NULL); + lock = anon->an_lock; + uvm_anfree(anon); + rw_exit(lock); + /* Note: extra reference is held for PG_RELEASED case. */ + rw_obj_free(lock); } Index: sys/uvm/uvm_anon.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_anon.h,v retrieving revision 1.21 diff -u -p -u -p -r1.21 uvm_anon.h --- sys/uvm/uvm_anon.h 4 Jan 2020 16:17:29 -0000 1.21 +++ sys/uvm/uvm_anon.h 22 Dec 2020 14:38:09 -0000 @@ -38,6 +38,8 @@ */ struct vm_anon { + struct rwlock *an_lock; + struct vm_page *an_page; /* if in RAM */ int an_ref; /* reference count */ @@ -78,12 +80,15 @@ struct vm_aref { #ifdef _KERNEL struct vm_anon *uvm_analloc(void); -void uvm_anfree(struct vm_anon *); -void uvm_anfree_list(struct vm_anon *, struct pglist *); +void uvm_anfree_list(struct vm_anon *, struct pglist *); +void uvm_anon_release(struct vm_anon *); void uvm_anwait(void); void uvm_anon_init(void); void uvm_anon_dropswap(struct vm_anon *); -boolean_t uvm_anon_pagein(struct vm_anon *); +boolean_t uvm_anon_pagein(struct vm_amap *, struct vm_anon *); + +#define uvm_anfree(an) uvm_anfree_list((an), NULL) + #endif /* _KERNEL */ #endif /* _UVM_UVM_ANON_H_ */ Index: sys/uvm/uvm_aobj.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_aobj.c,v retrieving revision 1.89 diff -u -p -u -p -r1.89 uvm_aobj.c --- sys/uvm/uvm_aobj.c 21 Oct 2020 09:08:14 -0000 1.89 +++ sys/uvm/uvm_aobj.c 22 Dec 2020 14:38:10 -0000 @@ -288,6 +288,8 @@ uao_set_swslot(struct uvm_object *uobj, struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; int oldslot; + KERNEL_ASSERT_LOCKED(); + /* if noswap flag is set, then we can't set a slot */ if (aobj->u_flags & UAO_FLAG_NOSWAP) { if (slot == 0) Index: sys/uvm/uvm_extern.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_extern.h,v retrieving revision 1.155 diff -u -p -u -p -r1.155 uvm_extern.h --- sys/uvm/uvm_extern.h 1 Dec 2020 13:56:22 -0000 1.155 +++ sys/uvm/uvm_extern.h 22 Dec 2020 14:38:10 -0000 @@ -289,6 +289,7 @@ void uvm_vsunlock_device(struct proc * void *); void uvm_pause(void); void uvm_init(void); +void uvm_init_percpu(void); int uvm_io(vm_map_t, struct uio *, int); #define UVM_IO_FIXPROT 0x01 Index: sys/uvm/uvm_fault.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_fault.c,v retrieving revision 1.109 diff -u -p -u -p -r1.109 uvm_fault.c --- sys/uvm/uvm_fault.c 8 Dec 2020 12:26:31 -0000 1.109 +++ sys/uvm/uvm_fault.c 22 Dec 2020 14:38:10 -0000 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -135,8 +136,7 @@ * by multiple map entries, and figuring out what should wait could be * complex as well...). * - * given that we are not currently multiprocessor or multithreaded we might - * as well choose alternative 2 now. maybe alternative 3 would be useful + * we use alternative 2 currently. maybe alternative 3 would be useful * in the future. XXX keep in mind for future consideration//rechecking. */ @@ -180,6 +180,7 @@ uvmfault_anonflush(struct vm_anon **anon for (lcv = 0 ; lcv < n ; lcv++) { if (anons[lcv] == NULL) continue; + KASSERT(rw_lock_held(anons[lcv]->an_lock)); pg = anons[lcv]->an_page; if (pg && (pg->pg_flags & PG_BUSY) == 0) { uvm_lock_pageq(); @@ -270,8 +271,11 @@ uvmfault_anonget(struct uvm_faultinfo *u struct vm_page *pg; int result; + KASSERT(rw_lock_held(anon->an_lock)); + KASSERT(anon->an_lock == amap->am_lock); + result = 0; /* XXX shut up gcc */ - uvmexp.fltanget++; + counters_inc(uvmexp_counters, flt_anget); /* bump rusage counters */ if (anon->an_page) curproc->p_ru.ru_minflt++; @@ -295,14 +299,20 @@ uvmfault_anonget(struct uvm_faultinfo *u if ((pg->pg_flags & (PG_BUSY|PG_RELEASED)) == 0) return (VM_PAGER_OK); atomic_setbits_int(&pg->pg_flags, PG_WANTED); - uvmexp.fltpgwait++; + counters_inc(uvmexp_counters, flt_pgwait); /* * the last unlock must be an atomic unlock+wait on * the owner of page */ - uvmfault_unlockall(ufi, amap, NULL); - tsleep_nsec(pg, PVM, "anonget2", INFSLP); + if (pg->uobject) { + uvmfault_unlockall(ufi, amap, NULL); + tsleep_nsec(pg, PVM, "anonget1", INFSLP); + } else { + uvmfault_unlockall(ufi, NULL, NULL); + rwsleep_nsec(pg, anon->an_lock, PVM | PNORELOCK, + "anonget2", INFSLP); + } /* ready to relock and try again */ } else { /* no page, we must try and bring it in. */ @@ -310,7 +320,7 @@ uvmfault_anonget(struct uvm_faultinfo *u if (pg == NULL) { /* out of RAM. */ uvmfault_unlockall(ufi, amap, NULL); - uvmexp.fltnoram++; + counters_inc(uvmexp_counters, flt_noram); uvm_wait("flt_noram1"); /* ready to relock and try again */ } else { @@ -325,7 +335,7 @@ uvmfault_anonget(struct uvm_faultinfo *u * it is ok to read an_swslot here because * we hold PG_BUSY on the page. */ - uvmexp.pageins++; + counters_inc(uvmexp_counters, pageins); result = uvm_swap_get(pg, anon->an_swslot, PGO_SYNCIO); @@ -339,6 +349,9 @@ uvmfault_anonget(struct uvm_faultinfo *u /* now relock and try again */ locked = uvmfault_relock(ufi); + if (locked || we_own) { + rw_enter(anon->an_lock, RW_WRITE); + } /* * if we own the page (i.e. we set PG_BUSY), then we need @@ -366,10 +379,11 @@ uvmfault_anonget(struct uvm_faultinfo *u */ if (pg->pg_flags & PG_RELEASED) { pmap_page_protect(pg, PROT_NONE); - uvm_anfree(anon); /* frees page for us */ + KASSERT(anon->an_ref == 0); if (locked) uvmfault_unlockall(ufi, amap, NULL); - uvmexp.fltpgrele++; + uvm_anon_release(anon); /* frees page for us */ + counters_inc(uvmexp_counters, flt_pgrele); return (VM_PAGER_REFAULT); /* refault! */ } @@ -399,6 +413,7 @@ uvmfault_anonget(struct uvm_faultinfo *u if (locked) uvmfault_unlockall(ufi, amap, NULL); + rw_exit(anon->an_lock); return (VM_PAGER_ERROR); } @@ -413,8 +428,12 @@ uvmfault_anonget(struct uvm_faultinfo *u } /* we were not able to relock. restart fault. */ - if (!locked) + if (!locked) { + if (we_own) { + rw_exit(anon->an_lock); + } return (VM_PAGER_REFAULT); + } /* verify no one touched the amap and moved the anon on us. */ if (ufi != NULL && @@ -426,7 +445,7 @@ uvmfault_anonget(struct uvm_faultinfo *u } /* try it again! */ - uvmexp.fltanretry++; + counters_inc(uvmexp_counters, flt_anretry); continue; } /* while (1) */ @@ -547,7 +566,7 @@ uvm_fault_check(struct uvm_faultinfo *uf /* need to clear */ uvmfault_unlockmaps(ufi, FALSE); uvmfault_amapcopy(ufi); - uvmexp.fltamcopy++; + counters_inc(uvmexp_counters, flt_amcopy); return (ERESTART); } else { /* @@ -603,6 +622,7 @@ uvm_fault_check(struct uvm_faultinfo *uf /* if we've got an amap, extract current anons. */ if (amap) { + amap_lock(amap); amap_lookups(&ufi->entry->aref, flt->startva - ufi->entry->start, *ranons, flt->npages); } else { @@ -623,8 +643,10 @@ uvm_fault_check(struct uvm_faultinfo *uf voff_t uoff; uoff = (flt->startva - ufi->entry->start) + ufi->entry->offset; + KERNEL_LOCK(); (void) uobj->pgops->pgo_flush(uobj, uoff, uoff + ((vsize_t)nback << PAGE_SHIFT), PGO_DEACTIVATE); + KERNEL_UNLOCK(); } /* now forget about the backpages */ @@ -654,6 +676,9 @@ uvm_fault_upper(struct uvm_faultinfo *uf struct vm_page *pg = NULL; int error, ret; + KASSERT(rw_write_held(amap->am_lock)); + KASSERT(anon->an_lock == amap->am_lock); + /* * no matter if we have case 1A or case 1B we are going to need to * have the anon's memory resident. ensure that now. @@ -685,6 +710,9 @@ uvm_fault_upper(struct uvm_faultinfo *uf #endif } + KASSERT(rw_write_held(amap->am_lock)); + KASSERT(anon->an_lock == amap->am_lock); + /* * if we are case 1B then we will need to allocate a new blank * anon to transfer the data into. note that we have a lock @@ -699,10 +727,11 @@ uvm_fault_upper(struct uvm_faultinfo *uf */ if ((access_type & PROT_WRITE) != 0 && anon->an_ref > 1) { - uvmexp.flt_acow++; + counters_inc(uvmexp_counters, flt_acow); oanon = anon; /* oanon = old */ anon = uvm_analloc(); if (anon) { + anon->an_lock = amap->am_lock; pg = uvm_pagealloc(NULL, 0, anon, 0); } @@ -710,10 +739,12 @@ uvm_fault_upper(struct uvm_faultinfo *uf if (anon == NULL || pg == NULL) { uvmfault_unlockall(ufi, amap, NULL); if (anon == NULL) - uvmexp.fltnoanon++; + counters_inc(uvmexp_counters, flt_noanon); else { + anon->an_lock = NULL; + anon->an_ref--; uvm_anfree(anon); - uvmexp.fltnoram++; + counters_inc(uvmexp_counters, flt_noram); } if (uvm_swapisfull()) @@ -745,7 +776,7 @@ uvm_fault_upper(struct uvm_faultinfo *uf * thus, no one can get at it until we are done with it. */ } else { - uvmexp.flt_anon++; + counters_inc(uvmexp_counters, flt_anon); oanon = anon; pg = anon->an_page; if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */ @@ -804,7 +835,6 @@ uvm_fault_upper(struct uvm_faultinfo *uf return 0; } - /* * uvm_fault_upper_lookup: look up existing h/w mapping and amap. * @@ -856,12 +886,13 @@ uvm_fault_upper_lookup(struct uvm_faulti continue; } anon = anons[lcv]; + KASSERT(anon->an_lock == amap->am_lock); if (anon->an_page && (anon->an_page->pg_flags & (PG_RELEASED|PG_BUSY)) == 0) { uvm_lock_pageq(); uvm_pageactivate(anon->an_page); /* reactivate */ uvm_unlock_pageq(); - uvmexp.fltnamap++; + counters_inc(uvmexp_counters, flt_namap); /* * Since this isn't the page that's actually faulting, @@ -909,7 +940,7 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad struct vm_page *pages[UVM_MAXRANGE]; int error = ERESTART; - uvmexp.faults++; /* XXX: locking? */ + counters_inc(uvmexp_counters, faults); TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL); /* init the IN parameters in the ufi */ @@ -922,7 +953,6 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad else flt.narrow = FALSE; /* normal fault */ - /* * ReFault */ @@ -994,7 +1024,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf * ("get" has the option of doing a pmap_enter for us) */ if (uobj != NULL) { - uvmexp.fltlget++; + counters_inc(uvmexp_counters, flt_lget); gotpages = flt->npages; (void) uobj->pgops->pgo_get(uobj, ufi->entry->offset + (flt->startva - ufi->entry->start), @@ -1038,7 +1068,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf uvm_lock_pageq(); uvm_pageactivate(pages[lcv]); /* reactivate */ uvm_unlock_pageq(); - uvmexp.fltnomap++; + counters_inc(uvmexp_counters, flt_nomap); /* * Since this page isn't the page that's @@ -1109,7 +1139,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf uvmfault_unlockall(ufi, amap, NULL); - uvmexp.fltget++; + counters_inc(uvmexp_counters, flt_get); gotpages = 1; uoff = (ufi->orig_rvaddr - ufi->entry->start) + ufi->entry->offset; result = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages, @@ -1134,6 +1164,8 @@ uvm_fault_lower(struct uvm_faultinfo *uf /* re-verify the state of the world. */ locked = uvmfault_relock(ufi); + if (locked && amap != NULL) + amap_lock(amap); /* * Re-verify that amap slot is still free. if there is @@ -1183,7 +1215,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf * * set "pg" to the page we want to map in (uobjpage, usually) */ - uvmexp.flt_obj++; + counters_inc(uvmexp_counters, flt_obj); if (UVM_ET_ISCOPYONWRITE(ufi->entry)) flt->enter_prot &= ~PROT_WRITE; pg = uobjpage; /* map in the actual object */ @@ -1211,6 +1243,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf * a zero'd, dirty page, so have * uvm_pagealloc() do that for us. */ + anon->an_lock = amap->am_lock; pg = uvm_pagealloc(NULL, 0, anon, (uobjpage == PGO_DONTCARE) ? UVM_PGA_ZERO : 0); } @@ -1235,10 +1268,12 @@ uvm_fault_lower(struct uvm_faultinfo *uf /* unlock and fail ... */ uvmfault_unlockall(ufi, amap, uobj); if (anon == NULL) - uvmexp.fltnoanon++; + counters_inc(uvmexp_counters, flt_noanon); else { + anon->an_lock = NULL; + anon->an_ref--; uvm_anfree(anon); - uvmexp.fltnoram++; + counters_inc(uvmexp_counters, flt_noram); } if (uvm_swapisfull()) @@ -1254,7 +1289,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf /* fill in the data */ if (uobjpage != PGO_DONTCARE) { - uvmexp.flt_prcopy++; + counters_inc(uvmexp_counters, flt_prcopy); /* copy page [pg now dirty] */ uvm_pagecopy(uobjpage, pg); @@ -1264,7 +1299,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf */ if ((amap_flags(amap) & AMAP_SHARED) != 0) { pmap_page_protect(uobjpage, PROT_NONE); - } + } /* dispose of uobjpage. drop handle to uobj as well. */ if (uobjpage->pg_flags & PG_WANTED) @@ -1277,7 +1312,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf uvm_unlock_pageq(); uobj = NULL; } else { - uvmexp.flt_przero++; + counters_inc(uvmexp_counters, flt_przero); /* * Page is zero'd and marked dirty by uvm_pagealloc() * above. @@ -1288,7 +1323,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf ufi->orig_rvaddr - ufi->entry->start, anon, 0)) { uvmfault_unlockall(ufi, amap, NULL); uvm_anfree(anon); - uvmexp.fltnoamap++; + counters_inc(uvmexp_counters, flt_noamap); if (uvm_swapisfull()) return (ENOMEM); @@ -1304,6 +1339,12 @@ uvm_fault_lower(struct uvm_faultinfo *uf * all resources are present. we can now map it in and free our * resources. */ + if (amap == NULL) + KASSERT(anon == NULL); + else { + KASSERT(rw_write_held(amap->am_lock)); + KASSERT(anon == NULL || anon->an_lock == amap->am_lock); + } if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr, VM_PAGE_TO_PHYS(pg) | flt->pa_flags, flt->enter_prot, access_type | PMAP_CANFAIL | (flt->wired ? PMAP_WIRED : 0)) != 0) { @@ -1489,7 +1530,8 @@ void uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap, struct uvm_object *uobj) { - + if (amap != NULL) + amap_unlock(amap); uvmfault_unlockmaps(ufi, FALSE); } @@ -1580,7 +1622,7 @@ uvmfault_relock(struct uvm_faultinfo *uf return TRUE; } - uvmexp.fltrelck++; + counters_inc(uvmexp_counters, flt_relck); /* * relock map. fail if version mismatch (in which case nothing @@ -1592,6 +1634,6 @@ uvmfault_relock(struct uvm_faultinfo *uf return(FALSE); } - uvmexp.fltrelckok++; + counters_inc(uvmexp_counters, flt_relckok); return(TRUE); /* got it! */ } Index: sys/uvm/uvm_init.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_init.c,v retrieving revision 1.40 diff -u -p -u -p -r1.40 uvm_init.c --- sys/uvm/uvm_init.c 11 May 2017 00:42:05 -0000 1.40 +++ sys/uvm/uvm_init.c 22 Dec 2020 14:38:10 -0000 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +53,9 @@ struct uvm uvm; /* decl */ struct uvmexp uvmexp; /* decl */ +COUNTERS_BOOT_MEMORY(uvmexp_countersboot, exp_ncounters); +struct cpumem *uvmexp_counters = COUNTERS_BOOT_INITIALIZER(uvmexp_countersboot); + #if defined(VM_MIN_KERNEL_ADDRESS) vaddr_t vm_min_kernel_address = VM_MIN_KERNEL_ADDRESS; #else @@ -184,4 +188,10 @@ uvm_init(void) uaddr_bestfit_create(vm_map_min(kmem_map), vm_map_max(kmem_map))); #endif /* !SMALL_KERNEL */ +} + +void +uvm_init_percpu(void) +{ + uvmexp_counters = counters_alloc_ncpus(uvmexp_counters, exp_ncounters); } Index: sys/uvm/uvm_map.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_map.c,v retrieving revision 1.269 diff -u -p -u -p -r1.269 uvm_map.c --- sys/uvm/uvm_map.c 19 Oct 2020 08:19:46 -0000 1.269 +++ sys/uvm/uvm_map.c 22 Dec 2020 14:38:10 -0000 @@ -3058,7 +3058,7 @@ uvm_map_init(void) pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0, IPL_NONE, PR_WAITOK, "vmsppl", NULL); pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, - IPL_VM, PR_WAITOK, "vmmpepl", NULL); + IPL_NONE, PR_WAITOK | PR_RWLOCK, "vmmpepl", NULL); pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0, IPL_VM, 0, "vmmpekpl", NULL); pool_sethiwat(&uvm_map_entry_pool, 8192); @@ -4682,12 +4682,14 @@ uvm_map_clean(struct vm_map *map, vaddr_ cp_start = MAX(entry->start, start); cp_end = MIN(entry->end, end); + amap_lock(amap); for (; cp_start != cp_end; cp_start += PAGE_SIZE) { anon = amap_lookup(&entry->aref, cp_start - entry->start); if (anon == NULL) continue; + KASSERT(anon->an_lock == amap->am_lock); pg = anon->an_page; if (pg == NULL) { continue; @@ -4743,6 +4745,7 @@ deactivate_it: panic("uvm_map_clean: weird flags"); } } + amap_unlock(amap); flush_object: cp_start = MAX(entry->start, start); Index: sys/uvm/uvm_meter.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_meter.c,v retrieving revision 1.41 diff -u -p -u -p -r1.41 uvm_meter.c --- sys/uvm/uvm_meter.c 24 Jun 2020 22:03:45 -0000 1.41 +++ sys/uvm/uvm_meter.c 22 Dec 2020 14:38:10 -0000 @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -178,9 +179,42 @@ uvm_sysctl(int *name, u_int namelen, voi return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals, sizeof(vmtotals))); - case VM_UVMEXP: - return (sysctl_rdstruct(oldp, oldlenp, newp, &uvmexp, - sizeof(uvmexp))); + case VM_UVMEXP: { + struct uvmexp uexp; + uint64_t counters[exp_ncounters]; + + memcpy(&uexp, &uvmexp, sizeof(uexp)); + + counters_read(uvmexp_counters, counters, exp_ncounters); + + /* stat counters */ + uexp.faults = (int)counters[faults]; + uexp.pageins = (int)counters[pageins]; + + /* fault subcounters */ + uexp.fltnoram = (int)counters[flt_noram]; + uexp.fltnoanon = (int)counters[flt_noanon]; + uexp.fltnoamap = (int)counters[flt_noamap]; + uexp.fltpgwait = (int)counters[flt_pgwait]; + uexp.fltpgrele = (int)counters[flt_pgrele]; + uexp.fltrelck = (int)counters[flt_relck]; + uexp.fltrelckok = (int)counters[flt_relckok]; + uexp.fltanget = (int)counters[flt_anget]; + uexp.fltanretry = (int)counters[flt_anretry]; + uexp.fltamcopy = (int)counters[flt_amcopy]; + uexp.fltnamap = (int)counters[flt_namap]; + uexp.fltnomap = (int)counters[flt_nomap]; + uexp.fltlget = (int)counters[flt_lget]; + uexp.fltget = (int)counters[flt_get]; + uexp.flt_anon = (int)counters[flt_anon]; + uexp.flt_acow = (int)counters[flt_acow]; + uexp.flt_obj = (int)counters[flt_obj]; + uexp.flt_prcopy = (int)counters[flt_prcopy]; + uexp.flt_przero = (int)counters[flt_przero]; + + return (sysctl_rdstruct(oldp, oldlenp, newp, &uexp, + sizeof(uexp))); + } case VM_NKMEMPAGES: return (sysctl_rdint(oldp, oldlenp, newp, nkmempages)); Index: sys/uvm/uvm_page.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_page.c,v retrieving revision 1.154 diff -u -p -u -p -r1.154 uvm_page.c --- sys/uvm/uvm_page.c 2 Dec 2020 16:32:00 -0000 1.154 +++ sys/uvm/uvm_page.c 22 Dec 2020 14:38:10 -0000 @@ -1050,7 +1050,8 @@ uvm_page_unbusy(struct vm_page **pgs, in } else { atomic_clearbits_int(&pg->pg_flags, PG_BUSY); UVM_PAGE_OWN(pg, NULL); - uvm_anfree(pg->uanon); + rw_enter(pg->uanon->an_lock, RW_WRITE); + uvm_anon_release(pg->uanon); } } else { atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY); Index: sys/uvm/uvm_pager.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_pager.c,v retrieving revision 1.73 diff -u -p -u -p -r1.73 uvm_pager.c --- sys/uvm/uvm_pager.c 21 Oct 2020 09:08:14 -0000 1.73 +++ sys/uvm/uvm_pager.c 22 Dec 2020 14:38:10 -0000 @@ -649,7 +649,8 @@ uvm_pager_dropcluster(struct uvm_object UVM_PAGE_OWN(ppsp[lcv], NULL); /* kills anon and frees pg */ - uvm_anfree(ppsp[lcv]->uanon); + rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE); + uvm_anon_release(ppsp[lcv]->uanon); continue; } else { Index: sys/uvm/uvmexp.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvmexp.h,v retrieving revision 1.7 diff -u -p -u -p -r1.7 uvmexp.h --- sys/uvm/uvmexp.h 14 Dec 2020 13:29:18 -0000 1.7 +++ sys/uvm/uvmexp.h 22 Dec 2020 14:38:10 -0000 @@ -156,4 +156,41 @@ struct _ps_strings { void *val; }; +#ifdef _KERNEL + +/* + * Per-cpu UVM counters. + */ +extern struct cpumem *uvmexp_counters; + +enum uvm_exp_counters { + /* stat counters */ + faults, /* page fault count */ + pageins, /* pagein operation count */ + + /* fault subcounters */ + flt_noram, /* number of times fault was out of ram */ + flt_noanon, /* number of times fault was out of anons */ + flt_noamap, /* number of times fault was out of amap chunks */ + flt_pgwait, /* number of times fault had to wait on a page */ + flt_pgrele, /* number of times fault found a released page */ + flt_relck, /* number of times fault relock called */ + flt_relckok, /* number of times fault relock is a success */ + flt_anget, /* number of times fault gets anon page */ + flt_anretry, /* number of times fault retrys an anon get */ + flt_amcopy, /* number of times fault clears "needs copy" */ + flt_namap, /* number of times fault maps a neighbor anon page */ + flt_nomap, /* number of times fault maps a neighbor obj page */ + flt_lget, /* number of times fault does a locked pgo_get */ + flt_get, /* number of times fault does an unlocked get */ + flt_anon, /* number of times fault anon (case 1a) */ + flt_acow, /* number of times fault anon cow (case 1b) */ + flt_obj, /* number of times fault is on object page (2a) */ + flt_prcopy, /* number of times fault promotes with copy (2b) */ + flt_przero, /* number of times fault promotes with zerofill (2b) */ + + exp_ncounters +}; + +#endif /* _KERNEL */ #endif /*_UVM_UVMEXP_ */