Index: sys/kern/init_main.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/init_main.c,v retrieving revision 1.303 diff -u -p -u -p -r1.303 init_main.c --- sys/kern/init_main.c 28 Dec 2020 14:01:23 -0000 1.303 +++ sys/kern/init_main.c 1 Jan 2021 02:57:40 -0000 @@ -232,6 +232,7 @@ main(void *framep) KERNEL_LOCK_INIT(); SCHED_LOCK_INIT(); + rw_obj_init(); uvm_init(); disk_init(); /* must come before autoconfiguration */ tty_init(); /* initialise tty's */ Index: sys/kern/kern_rwlock.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/kern_rwlock.c,v retrieving revision 1.45 diff -u -p -u -p -r1.45 kern_rwlock.c --- sys/kern/kern_rwlock.c 2 Mar 2020 17:07:49 -0000 1.45 +++ sys/kern/kern_rwlock.c 1 Jan 2021 02:57:40 -0000 @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -487,4 +488,124 @@ int rrw_status(struct rrwlock *rrwl) { return (rw_status(&rrwl->rrwl_lock)); +} + +/*- + * Copyright (c) 2008 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Andrew Doran. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define RWLOCK_OBJ_MAGIC 0x5aa3c85d +struct rwlock_obj { + struct rwlock ro_lock; + u_int ro_magic; + u_int ro_refcnt; +}; + + +struct pool rwlock_obj_pool; + +/* + * rw_obj_init: + * + * Initialize the mutex object store. + */ +void +rw_obj_init(void) +{ + pool_init(&rwlock_obj_pool, sizeof(struct rwlock_obj), 0, IPL_NONE, + PR_WAITOK | PR_RWLOCK, "rwobjpl", NULL); +} + +/* + * rw_obj_alloc: + * + * Allocate a single lock object. + */ +void +_rw_obj_alloc_flags(struct rwlock **lock, const char *name, int flags, + struct lock_type *type) +{ + struct rwlock_obj *mo; + + mo = pool_get(&rwlock_obj_pool, PR_WAITOK); + mo->ro_magic = RWLOCK_OBJ_MAGIC; + _rw_init_flags(&mo->ro_lock, name, flags, type); + mo->ro_refcnt = 1; + + *lock = &mo->ro_lock; +} + +/* + * rw_obj_hold: + * + * Add a single reference to a lock object. A reference to the object + * must already be held, and must be held across this call. + */ + +void +rw_obj_hold(struct rwlock *lock) +{ + struct rwlock_obj *mo = (struct rwlock_obj *)lock; + + KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC, + "%s: lock %p: mo->ro_magic (%#x) != RWLOCK_OBJ_MAGIC (%#x)", + __func__, mo, mo->ro_magic, RWLOCK_OBJ_MAGIC); + KASSERTMSG(mo->ro_refcnt > 0, + "%s: lock %p: mo->ro_refcnt (%#x) == 0", + __func__, mo, mo->ro_refcnt); + + atomic_inc_int(&mo->ro_refcnt); +} + +/* + * rw_obj_free: + * + * Drop a reference from a lock object. If the last reference is being + * dropped, free the object and return true. Otherwise, return false. + */ +int +rw_obj_free(struct rwlock *lock) +{ + struct rwlock_obj *mo = (struct rwlock_obj *)lock; + + KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC, + "%s: lock %p: mo->ro_magic (%#x) != RWLOCK_OBJ_MAGIC (%#x)", + __func__, mo, mo->ro_magic, RWLOCK_OBJ_MAGIC); + KASSERTMSG(mo->ro_refcnt > 0, + "%s: lock %p: mo->ro_refcnt (%#x) == 0", + __func__, mo, mo->ro_refcnt); + + if (atomic_dec_int_nv(&mo->ro_refcnt) > 0) { + return false; + } +#if notyet + WITNESS_DESTROY(&mo->ro_lock); +#endif + pool_put(&rwlock_obj_pool, mo); + return true; } Index: sys/kern/subr_pool.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.230 diff -u -p -u -p -r1.230 subr_pool.c --- sys/kern/subr_pool.c 24 Jan 2020 06:31:17 -0000 1.230 +++ sys/kern/subr_pool.c 1 Jan 2021 02:57:40 -0000 @@ -563,8 +563,6 @@ pool_get(struct pool *pp, int flags) int slowdown = 0; KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); - if (pp->pr_flags & PR_RWLOCK) - KASSERT(flags & PR_WAITOK); #ifdef MULTIPROCESSOR if (pp->pr_cache != NULL) { Index: sys/sys/rwlock.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/sys/rwlock.h,v retrieving revision 1.27 diff -u -p -u -p -r1.27 rwlock.h --- sys/sys/rwlock.h 15 Dec 2020 10:23:01 -0000 1.27 +++ sys/sys/rwlock.h 1 Jan 2021 02:57:40 -0000 @@ -209,6 +209,28 @@ int rrw_status(struct rrwlock *); #define rrw_init(rrwl, name) _rrw_init_flags(rrwl, name, 0, NULL) #endif /* WITNESS */ + +/* + * Allocated, reference-counted rwlocks + */ + +#ifdef WITNESS +#define rw_obj_alloc_flags(rwl, name, flags) do { \ + static struct lock_type __lock_type = { .lt_name = #rwl }; \ + _rw_obj_alloc_flags(rwl, name, flags, &__lock_type); \ +} while (0) +#else +#define rw_obj_alloc_flags(rwl, name, flags) \ + _rw_obj_alloc_flags(rwl, name, flags, NULL) +#endif +#define rw_obj_alloc(rwl, name) rw_obj_alloc_flags(rwl, name, 0) + +void rw_obj_init(void); +void _rw_obj_alloc_flags(struct rwlock **, const char *, int, + struct lock_type *); +void rw_obj_hold(struct rwlock *); +int rw_obj_free(struct rwlock *); + #endif /* _KERNEL */ #endif /* _SYS_RWLOCK_H */ Index: sys/uvm/uvm_amap.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_amap.c,v retrieving revision 1.86 diff -u -p -u -p -r1.86 uvm_amap.c --- sys/uvm/uvm_amap.c 13 Nov 2020 11:11:48 -0000 1.86 +++ sys/uvm/uvm_amap.c 1 Jan 2021 02:57:40 -0000 @@ -55,6 +55,9 @@ struct pool uvm_small_amap_pool[UVM_AMAP struct pool uvm_amap_chunk_pool; LIST_HEAD(, vm_amap) amap_list; +struct rwlock amap_list_lock = RWLOCK_INITIALIZER("amaplstlk"); +#define amap_lock_list() rw_enter_write(&amap_list_lock) +#define amap_unlock_list() rw_exit_write(&amap_list_lock) static char amap_small_pool_names[UVM_AMAP_CHUNK][9]; @@ -89,13 +92,17 @@ void amap_wiperange(struct vm_amap *, in static inline void amap_list_insert(struct vm_amap *amap) { + amap_lock_list(); LIST_INSERT_HEAD(&amap_list, amap, am_list); + amap_unlock_list(); } static inline void amap_list_remove(struct vm_amap *amap) -{ +{ + amap_lock_list(); LIST_REMOVE(amap, am_list); + amap_unlock_list(); } /* @@ -249,7 +256,7 @@ amap_init(void) /* Initialize the vm_amap pool. */ pool_init(&uvm_amap_pool, sizeof(struct vm_amap), - 0, IPL_NONE, PR_WAITOK, "amappl", NULL); + 0, IPL_NONE, PR_WAITOK | PR_RWLOCK, "amappl", NULL); pool_sethiwat(&uvm_amap_pool, 4096); /* initialize small amap pools */ @@ -258,13 +265,13 @@ amap_init(void) sizeof(amap_small_pool_names[0]), "amappl%d", i + 1); size = offsetof(struct vm_amap, am_small.ac_anon) + (i + 1) * sizeof(struct vm_anon *); - pool_init(&uvm_small_amap_pool[i], size, 0, - IPL_NONE, 0, amap_small_pool_names[i], NULL); + pool_init(&uvm_small_amap_pool[i], size, 0, IPL_NONE, + PR_WAITOK | PR_RWLOCK, amap_small_pool_names[i], NULL); } pool_init(&uvm_amap_chunk_pool, sizeof(struct vm_amap_chunk) + UVM_AMAP_CHUNK * sizeof(struct vm_anon *), - 0, IPL_NONE, 0, "amapchunkpl", NULL); + 0, IPL_NONE, PR_WAITOK | PR_RWLOCK, "amapchunkpl", NULL); pool_sethiwat(&uvm_amap_chunk_pool, 4096); } @@ -332,6 +339,7 @@ amap_alloc1(int slots, int waitf, int la if (amap == NULL) return(NULL); + amap->am_lock = NULL; amap->am_ref = 1; amap->am_flags = 0; #ifdef UVM_AMAP_PPREF @@ -389,6 +397,12 @@ fail1: return (NULL); } +static void +amap_lock_alloc(struct vm_amap *amap) +{ + rw_obj_alloc(&amap->am_lock, "amaplk"); +} + /* * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM * @@ -406,8 +420,10 @@ amap_alloc(vaddr_t sz, int waitf, int la return (NULL); amap = amap_alloc1(slots, waitf, lazyalloc); - if (amap) + if (amap != NULL) { + amap_lock_alloc(amap); amap_list_insert(amap); + } return(amap); } @@ -426,6 +442,11 @@ amap_free(struct vm_amap *amap) KASSERT(amap->am_ref == 0 && amap->am_nused == 0); KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0); + if (amap->am_lock != NULL) { + KASSERT(amap->am_lock == NULL || !rw_write_held(amap->am_lock)); + rw_obj_free(amap->am_lock); + } + #ifdef UVM_AMAP_PPREF if (amap->am_ppref && amap->am_ppref != PPREF_NONE) free(amap->am_ppref, M_UVMAMAP, amap->am_nslot * sizeof(int)); @@ -447,6 +468,7 @@ amap_free(struct vm_amap *amap) * * => called from amap_unref when the final reference to an amap is * discarded (i.e. when reference count == 1) + * => amap must be locked. */ void @@ -457,15 +479,16 @@ amap_wipeout(struct vm_amap *amap) struct vm_amap_chunk *chunk; struct pglist pgl; + KASSERT(rw_write_held(amap->am_lock)); KASSERT(amap->am_ref == 0); if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) { /* amap_swap_off will call us again. */ + amap_unlock(amap); return; } TAILQ_INIT(&pgl); - amap_list_remove(amap); AMAP_CHUNK_FOREACH(chunk, amap) { @@ -478,6 +501,7 @@ amap_wipeout(struct vm_amap *amap) if (anon == NULL || anon->an_ref == 0) panic("amap_wipeout: corrupt amap"); + KASSERT(anon->an_lock == amap->am_lock); refs = --anon->an_ref; if (refs == 0) { @@ -495,7 +519,8 @@ amap_wipeout(struct vm_amap *amap) /* now we free the map */ amap->am_ref = 0; /* ... was one */ amap->am_nused = 0; - amap_free(amap); /* will free amap */ + amap_unlock(amap); + amap_free(amap); } /* @@ -503,6 +528,8 @@ amap_wipeout(struct vm_amap *amap) * by copying the amap if necessary. * * => an entry with a null amap pointer will get a new (blank) one. + * => the map that the map entry blocks to must be locked by caller. + * => the amap (if any) currently attached to the entry must be unlocked. * => if canchunk is true, then we may clip the entry into a chunk * => "startva" and "endva" are used only if canchunk is true. they are * used to limit chunking (e.g. if you have a large space that you @@ -519,6 +546,9 @@ amap_copy(struct vm_map *map, struct vm_ vaddr_t chunksize; int i, j, k, n, srcslot; struct vm_amap_chunk *chunk = NULL, *srcchunk = NULL; + struct vm_anon *anon; + + KASSERT(map != kernel_map); /* we use sleeping locks */ /* is there a map to copy? if not, create one from scratch. */ if (entry->aref.ar_amap == NULL) { @@ -574,6 +604,8 @@ amap_copy(struct vm_map *map, struct vm_ return; srcamap = entry->aref.ar_amap; + amap_lock(srcamap); + /* * need to double check reference count now. the reference count * could have changed while we were in malloc. if the reference count @@ -582,6 +614,7 @@ amap_copy(struct vm_map *map, struct vm_ */ if (srcamap->am_ref == 1) { /* take it over? */ entry->etype &= ~UVM_ET_NEEDSCOPY; + amap_unlock(srcamap); amap->am_ref--; /* drop final reference to map */ amap_free(amap); /* dispose of new (unused) amap */ return; @@ -606,18 +639,21 @@ amap_copy(struct vm_map *map, struct vm_ chunk = amap_chunk_get(amap, lcv, 1, PR_NOWAIT); if (chunk == NULL) { + amap_unlock(srcamap); amap->am_ref = 0; amap_wipeout(amap); return; } for (k = 0; k < n; i++, j++, k++) { - chunk->ac_anon[i] = srcchunk->ac_anon[j]; - if (chunk->ac_anon[i] == NULL) + chunk->ac_anon[i] = anon = srcchunk->ac_anon[j]; + if (anon == NULL) continue; + KASSERT(anon->an_lock == srcamap->am_lock); + KASSERT(anon->an_ref > 0); chunk->ac_usedmap |= (1 << i); - chunk->ac_anon[i]->an_ref++; + anon->an_ref++; amap->am_nused++; } } @@ -629,6 +665,8 @@ amap_copy(struct vm_map *map, struct vm_ * the count to zero. [and no need to worry about freeing it] */ srcamap->am_ref--; + KASSERT(srcamap->am_ref > 0); + if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ #ifdef UVM_AMAP_PPREF @@ -638,6 +676,20 @@ amap_copy(struct vm_map *map, struct vm_ } #endif + /* + * If we referenced any anons, then share the source amap's lock. + * Otherwise, we have nothing in common, so allocate a new one. + */ + KASSERT(amap->am_lock == NULL); + if (amap->am_nused != 0) { + amap->am_lock = srcamap->am_lock; + rw_obj_hold(amap->am_lock); + } + amap_unlock(srcamap); + + if (amap->am_lock == NULL) + amap_lock_alloc(amap); + /* install new amap. */ entry->aref.ar_pageoff = 0; entry->aref.ar_amap = amap; @@ -655,6 +707,7 @@ amap_copy(struct vm_map *map, struct vm_ * so we resolve the COW here. * * => assume parent's entry was wired, thus all pages are resident. + * => the parent and child vm_map must both be locked. * => caller passes child's map/entry in to us * => XXXCDC: out of memory should cause fork to fail, but there is * currently no easy way to do this (needs fix) @@ -675,6 +728,7 @@ amap_cow_now(struct vm_map *map, struct * am_anon[] array on us. */ ReStart: + amap_lock(amap); AMAP_CHUNK_FOREACH(chunk, amap) { int i, map = chunk->ac_usedmap; @@ -683,6 +737,7 @@ ReStart: map ^= 1 << slot; anon = chunk->ac_anon[slot]; pg = anon->an_page; + KASSERT(anon->an_lock == amap->am_lock); /* page must be resident since parent is wired */ KASSERT(pg != NULL); @@ -700,24 +755,27 @@ ReStart: */ if (pg->pg_flags & PG_BUSY) { atomic_setbits_int(&pg->pg_flags, PG_WANTED); - tsleep_nsec(pg, PVM, "cownow", INFSLP); + rwsleep_nsec(pg, amap->am_lock, PVM | PNORELOCK, + "cownow", INFSLP); goto ReStart; } /* ok, time to do a copy-on-write to a new anon */ nanon = uvm_analloc(); - if (nanon) { + if (nanon != NULL) { + /* the new anon will share the amap's lock */ + nanon->an_lock = amap->am_lock; npg = uvm_pagealloc(NULL, 0, nanon, 0); } else npg = NULL; /* XXX: quiet gcc warning */ if (nanon == NULL || npg == NULL) { /* out of memory */ - /* - * XXXCDC: we should cause fork to fail, but - * we can't ... - */ - if (nanon) { + amap_unlock(amap); + if (nanon != NULL) { + nanon->an_lock = NULL; + nanon->an_ref--; + KASSERT(nanon->an_ref == 0); uvm_anfree(nanon); } uvm_wait("cownowpage"); @@ -730,6 +788,7 @@ ReStart: */ uvm_pagecopy(pg, npg); /* old -> new */ anon->an_ref--; /* can't drop to zero */ + KASSERT(anon->an_ref > 0); chunk->ac_anon[slot] = nanon; /* replace */ /* @@ -744,6 +803,7 @@ ReStart: uvm_unlock_pageq(); } } + amap_unlock(amap); } /* @@ -757,10 +817,13 @@ amap_splitref(struct vm_aref *origref, s struct vm_amap *amap = origref->ar_amap; int leftslots; + KASSERT(splitref->ar_amap == amap); AMAP_B2SLOT(leftslots, offset); if (leftslots == 0) panic("amap_splitref: split at zero offset"); + amap_lock(amap); + /* now: we have a valid am_mapped array. */ if (amap->am_nslot - origref->ar_pageoff - leftslots <= 0) panic("amap_splitref: map size check failed"); @@ -775,6 +838,7 @@ amap_splitref(struct vm_aref *origref, s amap->am_ref++; splitref->ar_amap = amap; splitref->ar_pageoff = origref->ar_pageoff + leftslots; + amap_unlock(amap); } #ifdef UVM_AMAP_PPREF @@ -786,6 +850,7 @@ void amap_pp_establish(struct vm_amap *amap) { + KASSERT(rw_write_held(amap->am_lock)); amap->am_ppref = mallocarray(amap->am_nslot, sizeof(int), M_UVMAMAP, M_NOWAIT|M_ZERO); @@ -811,6 +876,8 @@ amap_pp_adjref(struct vm_amap *amap, int int stopslot, *ppref, lcv, prevlcv; int ref, len, prevref, prevlen; + KASSERT(rw_write_held(amap->am_lock)); + stopslot = curslot + slotlen; ppref = amap->am_ppref; prevlcv = 0; @@ -893,6 +960,7 @@ amap_wiperange_chunk(struct vm_amap *ama map ^= 1 << curslot; chunk->ac_usedmap ^= 1 << curslot; anon = chunk->ac_anon[curslot]; + KASSERT(anon->an_lock == amap->am_lock); /* remove it from the amap */ chunk->ac_anon[curslot] = NULL; @@ -902,10 +970,6 @@ amap_wiperange_chunk(struct vm_amap *ama /* drop anon reference count */ refs = --anon->an_ref; if (refs == 0) { - /* - * we just eliminated the last reference to an - * anon. free it. - */ uvm_anfree(anon); } } @@ -921,6 +985,8 @@ amap_wiperange(struct vm_amap *amap, int int bucket, startbucket, endbucket; struct vm_amap_chunk *chunk, *nchunk; + KASSERT(rw_write_held(amap->am_lock)); + startbucket = UVM_AMAP_BUCKET(amap, slotoff); endbucket = UVM_AMAP_BUCKET(amap, slotoff + slots - 1); @@ -980,12 +1046,24 @@ amap_swap_off(int startslot, int endslot { struct vm_amap *am; struct vm_amap *am_next; + struct vm_amap marker; boolean_t rv = FALSE; + amap_lock_list(); for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) { int i, map; struct vm_amap_chunk *chunk; + amap_lock(am); + if (am->am_nused == 0) { + amap_unlock(am); + am_next = LIST_NEXT(am, am_list); + continue; + } + + LIST_INSERT_AFTER(am, &marker, am_list); + amap_unlock_list(); + again: AMAP_CHUNK_FOREACH(chunk, am) { map = chunk->ac_usedmap; @@ -1005,20 +1083,28 @@ again: am->am_flags |= AMAP_SWAPOFF; - rv = uvm_anon_pagein(anon); + rv = uvm_anon_pagein(am, anon); + amap_lock(am); am->am_flags &= ~AMAP_SWAPOFF; - if (rv || amap_refs(am) == 0) + if (amap_refs(am) == 0) { + amap_wipeout(am); + am = NULL; + goto nextamap; + } + if (rv) goto nextamap; goto again; } } - nextamap: - am_next = LIST_NEXT(am, am_list); - if (amap_refs(am) == 0) - amap_wipeout(am); + if (am != NULL) + amap_unlock(am); + amap_lock_list(); + am_next = LIST_NEXT(&marker, am_list); + LIST_REMOVE(&marker, am_list); } + amap_unlock_list(); return rv; } @@ -1147,9 +1233,11 @@ amap_add(struct vm_aref *aref, vaddr_t o void amap_unadd(struct vm_aref *aref, vaddr_t offset) { - int slot; struct vm_amap *amap = aref->ar_amap; struct vm_amap_chunk *chunk; + int slot; + + KASSERT(rw_write_held(amap->am_lock)); AMAP_B2SLOT(slot, offset); slot += aref->ar_pageoff; @@ -1176,6 +1264,12 @@ amap_adjref_anons(struct vm_amap *amap, int refv, boolean_t all) { #ifdef UVM_AMAP_PPREF + KASSERT(rw_write_held(amap->am_lock)); + + /* + * We must establish the ppref array before changing am_ref + * so that the ppref values match the current amap refcount. + */ if (amap->am_ppref == NULL && !all && len != amap->am_nslot) { amap_pp_establish(amap); } @@ -1192,32 +1286,37 @@ amap_adjref_anons(struct vm_amap *amap, } } #endif + amap_unlock(amap); } /* - * amap_ref: gain a reference to an amap + * amap_ref: gain a reference to an amap. * - * => "offset" and "len" are in units of pages - * => called at fork time to gain the child's reference + * => amap must not be locked (we will lock). + * => "offset" and "len" are in units of pages. + * => Called at fork time to gain the child's reference. */ void amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags) { - + amap_lock(amap); if (flags & AMAP_SHARED) amap->am_flags |= AMAP_SHARED; amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0); } /* - * amap_unref: remove a reference to an amap + * amap_unref: remove a reference to an amap. * * => All pmap-level references to this amap must be already removed. * => Called from uvm_unmap_detach(); entry is already removed from the map. + * => We will lock amap, so it must be unlocked. */ void amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all) { + amap_lock(amap); + KASSERT(amap->am_ref > 0); if (amap->am_ref == 1) { Index: sys/uvm/uvm_amap.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_amap.h,v retrieving revision 1.32 diff -u -p -u -p -r1.32 uvm_amap.h --- sys/uvm/uvm_amap.h 13 Nov 2020 11:11:49 -0000 1.32 +++ sys/uvm/uvm_amap.h 1 Jan 2021 02:57:40 -0000 @@ -133,6 +133,7 @@ struct vm_amap_chunk { }; struct vm_amap { + struct rwlock *am_lock; /* lock for all vm_amap flags */ int am_ref; /* reference count */ int am_flags; /* flags */ int am_nslot; /* # of slots currently in map */ @@ -260,6 +261,9 @@ struct vm_amap { #define amap_flags(AMAP) ((AMAP)->am_flags) #define amap_refs(AMAP) ((AMAP)->am_ref) + +#define amap_lock(AMAP) rw_enter_write((AMAP)->am_lock) +#define amap_unlock(AMAP) rw_exit_write((AMAP)->am_lock) #endif /* _KERNEL */ Index: sys/uvm/uvm_anon.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_anon.c,v retrieving revision 1.50 diff -u -p -u -p -r1.50 uvm_anon.c --- sys/uvm/uvm_anon.c 24 Nov 2020 13:49:09 -0000 1.50 +++ sys/uvm/uvm_anon.c 1 Jan 2021 02:57:40 -0000 @@ -49,7 +49,7 @@ void uvm_anon_init(void) { pool_init(&uvm_anon_pool, sizeof(struct vm_anon), 0, IPL_NONE, - PR_WAITOK, "anonpl", NULL); + PR_WAITOK | PR_RWLOCK, "anonpl", NULL); pool_sethiwat(&uvm_anon_pool, uvmexp.free / 16); } @@ -63,6 +63,7 @@ uvm_analloc(void) anon = pool_get(&uvm_anon_pool, PR_NOWAIT); if (anon) { + anon->an_lock = NULL; anon->an_ref = 1; anon->an_page = NULL; anon->an_swslot = 0; @@ -71,25 +72,26 @@ uvm_analloc(void) } /* - * uvm_anfree: free a single anon structure + * uvm_anfree_list: free a single anon structure * - * => caller must remove anon from its amap before calling (if it was in - * an amap). + * => anon must be removed from the amap (if anon was in an amap). + * => amap must be locked, if anon was owned by amap. * => we may lock the pageq's. */ void uvm_anfree_list(struct vm_anon *anon, struct pglist *pgl) { - struct vm_page *pg; + struct vm_page *pg = anon->an_page; - /* get page */ - pg = anon->an_page; + KASSERT(anon->an_lock == NULL || rw_write_held(anon->an_lock)); + KASSERT(anon->an_ref == 0); /* - * if we have a resident page, we must dispose of it before freeing - * the anon. + * Dispose of the page, if it is resident. */ - if (pg) { + if (pg != NULL) { + KASSERT(anon->an_lock != NULL); + /* * if page is busy then we just mark it as released (who ever * has it busy must check for this when they wake up). if the @@ -98,6 +100,7 @@ uvm_anfree_list(struct vm_anon *anon, st if ((pg->pg_flags & PG_BUSY) != 0) { /* tell them to dump it when done */ atomic_setbits_int(&pg->pg_flags, PG_RELEASED); + rw_obj_hold(anon->an_lock); return; } pmap_page_protect(pg, PROT_NONE); @@ -115,12 +118,14 @@ uvm_anfree_list(struct vm_anon *anon, st uvm_pagefree(pg); /* bye bye */ uvm_unlock_pageq(); /* free the daemon */ } + } else { + if (anon->an_swslot != 0) { + /* this page is no longer only in swap. */ + KASSERT(uvmexp.swpgonly > 0); + uvmexp.swpgonly--; + } } - if (pg == NULL && anon->an_swslot != 0) { - /* this page is no longer only in swap. */ - KASSERT(uvmexp.swpgonly > 0); - uvmexp.swpgonly--; - } + anon->an_lock = NULL; /* free any swap resources. */ uvm_anon_dropswap(anon); @@ -135,12 +140,6 @@ uvm_anfree_list(struct vm_anon *anon, st pool_put(&uvm_anon_pool, anon); } -void -uvm_anfree(struct vm_anon *anon) -{ - uvm_anfree_list(anon, NULL); -} - /* * uvm_anwait: wait for memory to become available to allocate an anon. */ @@ -155,35 +154,25 @@ uvm_anwait(void) } /* - * uvm_anon_dropswap: release any swap resources from this anon. - */ -void -uvm_anon_dropswap(struct vm_anon *anon) -{ - - if (anon->an_swslot == 0) - return; - - uvm_swap_free(anon->an_swslot, 1); - anon->an_swslot = 0; -} - -/* * fetch an anon's page. * * => returns TRUE if pagein was aborted due to lack of memory. */ boolean_t -uvm_anon_pagein(struct vm_anon *anon) +uvm_anon_pagein(struct vm_amap *amap, struct vm_anon *anon) { struct vm_page *pg; int rv; - rv = uvmfault_anonget(NULL, NULL, anon); + KASSERT(rw_write_held(anon->an_lock)); + KASSERT(anon->an_lock == amap->am_lock); + + rv = uvmfault_anonget(NULL, amap, anon); switch (rv) { case VM_PAGER_OK: + KASSERT(rw_write_held(anon->an_lock)); break; case VM_PAGER_ERROR: case VM_PAGER_REFAULT: @@ -206,7 +195,9 @@ uvm_anon_pagein(struct vm_anon *anon) * mark it as dirty, clear its swslot and un-busy it. */ pg = anon->an_page; - uvm_swap_free(anon->an_swslot, 1); + if (anon->an_swslot > 0) { + uvm_swap_free(anon->an_swslot, 1); + } anon->an_swslot = 0; atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); @@ -216,6 +207,57 @@ uvm_anon_pagein(struct vm_anon *anon) uvm_lock_pageq(); uvm_pagedeactivate(pg); uvm_unlock_pageq(); + rw_exit(anon->an_lock); return FALSE; +} + +/* + * uvm_anon_dropswap: release any swap resources from this anon. + * + * => anon must be locked or have a reference count of 0. + */ +void +uvm_anon_dropswap(struct vm_anon *anon) +{ + KASSERT(anon->an_ref == 0 || rw_lock_held(anon->an_lock)); + + if (anon->an_swslot == 0) + return; + + uvm_swap_free(anon->an_swslot, 1); + anon->an_swslot = 0; +} + + +/* + * uvm_anon_release: release an anon and its page. + * + * => anon should not have any references. + * => anon must be locked. + */ + +void +uvm_anon_release(struct vm_anon *anon) +{ + struct vm_page *pg = anon->an_page; + struct rwlock *lock; + + KASSERT(rw_write_held(anon->an_lock)); + KASSERT(pg != NULL); + KASSERT((pg->pg_flags & PG_RELEASED) != 0); + KASSERT((pg->pg_flags & PG_BUSY) != 0); + KASSERT(pg->uobject == NULL); + KASSERT(pg->uanon == anon); + KASSERT(anon->an_ref == 0); + + uvm_lock_pageq(); + uvm_pagefree(pg); + uvm_unlock_pageq(); + KASSERT(anon->an_page == NULL); + lock = anon->an_lock; + uvm_anfree(anon); + rw_exit(lock); + /* Note: extra reference is held for PG_RELEASED case. */ + rw_obj_free(lock); } Index: sys/uvm/uvm_anon.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_anon.h,v retrieving revision 1.21 diff -u -p -u -p -r1.21 uvm_anon.h --- sys/uvm/uvm_anon.h 4 Jan 2020 16:17:29 -0000 1.21 +++ sys/uvm/uvm_anon.h 1 Jan 2021 02:57:40 -0000 @@ -38,6 +38,8 @@ */ struct vm_anon { + struct rwlock *an_lock; + struct vm_page *an_page; /* if in RAM */ int an_ref; /* reference count */ @@ -78,12 +80,15 @@ struct vm_aref { #ifdef _KERNEL struct vm_anon *uvm_analloc(void); -void uvm_anfree(struct vm_anon *); -void uvm_anfree_list(struct vm_anon *, struct pglist *); +void uvm_anfree_list(struct vm_anon *, struct pglist *); +void uvm_anon_release(struct vm_anon *); void uvm_anwait(void); void uvm_anon_init(void); void uvm_anon_dropswap(struct vm_anon *); -boolean_t uvm_anon_pagein(struct vm_anon *); +boolean_t uvm_anon_pagein(struct vm_amap *, struct vm_anon *); + +#define uvm_anfree(an) uvm_anfree_list((an), NULL) + #endif /* _KERNEL */ #endif /* _UVM_UVM_ANON_H_ */ Index: sys/uvm/uvm_aobj.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_aobj.c,v retrieving revision 1.89 diff -u -p -u -p -r1.89 uvm_aobj.c --- sys/uvm/uvm_aobj.c 21 Oct 2020 09:08:14 -0000 1.89 +++ sys/uvm/uvm_aobj.c 1 Jan 2021 02:57:40 -0000 @@ -288,6 +288,8 @@ uao_set_swslot(struct uvm_object *uobj, struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; int oldslot; + KERNEL_ASSERT_LOCKED(); + /* if noswap flag is set, then we can't set a slot */ if (aobj->u_flags & UAO_FLAG_NOSWAP) { if (slot == 0) Index: sys/uvm/uvm_fault.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_fault.c,v retrieving revision 1.110 diff -u -p -u -p -r1.110 uvm_fault.c --- sys/uvm/uvm_fault.c 28 Dec 2020 14:01:23 -0000 1.110 +++ sys/uvm/uvm_fault.c 1 Jan 2021 02:57:40 -0000 @@ -136,8 +136,7 @@ * by multiple map entries, and figuring out what should wait could be * complex as well...). * - * given that we are not currently multiprocessor or multithreaded we might - * as well choose alternative 2 now. maybe alternative 3 would be useful + * we use alternative 2 currently. maybe alternative 3 would be useful * in the future. XXX keep in mind for future consideration//rechecking. */ @@ -181,6 +180,7 @@ uvmfault_anonflush(struct vm_anon **anon for (lcv = 0 ; lcv < n ; lcv++) { if (anons[lcv] == NULL) continue; + KASSERT(rw_lock_held(anons[lcv]->an_lock)); pg = anons[lcv]->an_page; if (pg && (pg->pg_flags & PG_BUSY) == 0) { uvm_lock_pageq(); @@ -271,6 +271,9 @@ uvmfault_anonget(struct uvm_faultinfo *u struct vm_page *pg; int result; + KASSERT(rw_lock_held(anon->an_lock)); + KASSERT(anon->an_lock == amap->am_lock); + result = 0; /* XXX shut up gcc */ counters_inc(uvmexp_counters, flt_anget); /* bump rusage counters */ @@ -302,8 +305,14 @@ uvmfault_anonget(struct uvm_faultinfo *u * the last unlock must be an atomic unlock+wait on * the owner of page */ - uvmfault_unlockall(ufi, amap, NULL); - tsleep_nsec(pg, PVM, "anonget2", INFSLP); + if (pg->uobject) { + uvmfault_unlockall(ufi, amap, NULL); + tsleep_nsec(pg, PVM, "anonget1", INFSLP); + } else { + uvmfault_unlockall(ufi, NULL, NULL); + rwsleep_nsec(pg, anon->an_lock, PVM | PNORELOCK, + "anonget2", INFSLP); + } /* ready to relock and try again */ } else { /* no page, we must try and bring it in. */ @@ -340,6 +349,9 @@ uvmfault_anonget(struct uvm_faultinfo *u /* now relock and try again */ locked = uvmfault_relock(ufi); + if (locked || we_own) { + rw_enter(anon->an_lock, RW_WRITE); + } /* * if we own the page (i.e. we set PG_BUSY), then we need @@ -367,9 +379,10 @@ uvmfault_anonget(struct uvm_faultinfo *u */ if (pg->pg_flags & PG_RELEASED) { pmap_page_protect(pg, PROT_NONE); - uvm_anfree(anon); /* frees page for us */ + KASSERT(anon->an_ref == 0); if (locked) uvmfault_unlockall(ufi, amap, NULL); + uvm_anon_release(anon); /* frees page for us */ counters_inc(uvmexp_counters, flt_pgrele); return (VM_PAGER_REFAULT); /* refault! */ } @@ -400,6 +413,7 @@ uvmfault_anonget(struct uvm_faultinfo *u if (locked) uvmfault_unlockall(ufi, amap, NULL); + rw_exit(anon->an_lock); return (VM_PAGER_ERROR); } @@ -414,8 +428,12 @@ uvmfault_anonget(struct uvm_faultinfo *u } /* we were not able to relock. restart fault. */ - if (!locked) + if (!locked) { + if (we_own) { + rw_exit(anon->an_lock); + } return (VM_PAGER_REFAULT); + } /* verify no one touched the amap and moved the anon on us. */ if (ufi != NULL && @@ -604,6 +622,7 @@ uvm_fault_check(struct uvm_faultinfo *uf /* if we've got an amap, extract current anons. */ if (amap) { + amap_lock(amap); amap_lookups(&ufi->entry->aref, flt->startva - ufi->entry->start, *ranons, flt->npages); } else { @@ -624,8 +643,10 @@ uvm_fault_check(struct uvm_faultinfo *uf voff_t uoff; uoff = (flt->startva - ufi->entry->start) + ufi->entry->offset; + KERNEL_LOCK(); (void) uobj->pgops->pgo_flush(uobj, uoff, uoff + ((vsize_t)nback << PAGE_SHIFT), PGO_DEACTIVATE); + KERNEL_UNLOCK(); } /* now forget about the backpages */ @@ -655,6 +676,9 @@ uvm_fault_upper(struct uvm_faultinfo *uf struct vm_page *pg = NULL; int error, ret; + KASSERT(rw_write_held(amap->am_lock)); + KASSERT(anon->an_lock == amap->am_lock); + /* * no matter if we have case 1A or case 1B we are going to need to * have the anon's memory resident. ensure that now. @@ -686,6 +710,9 @@ uvm_fault_upper(struct uvm_faultinfo *uf #endif } + KASSERT(rw_write_held(amap->am_lock)); + KASSERT(anon->an_lock == amap->am_lock); + /* * if we are case 1B then we will need to allocate a new blank * anon to transfer the data into. note that we have a lock @@ -704,6 +731,7 @@ uvm_fault_upper(struct uvm_faultinfo *uf oanon = anon; /* oanon = old */ anon = uvm_analloc(); if (anon) { + anon->an_lock = amap->am_lock; pg = uvm_pagealloc(NULL, 0, anon, 0); } @@ -713,6 +741,8 @@ uvm_fault_upper(struct uvm_faultinfo *uf if (anon == NULL) counters_inc(uvmexp_counters, flt_noanon); else { + anon->an_lock = NULL; + anon->an_ref--; uvm_anfree(anon); counters_inc(uvmexp_counters, flt_noram); } @@ -805,7 +835,6 @@ uvm_fault_upper(struct uvm_faultinfo *uf return 0; } - /* * uvm_fault_upper_lookup: look up existing h/w mapping and amap. * @@ -857,6 +886,7 @@ uvm_fault_upper_lookup(struct uvm_faulti continue; } anon = anons[lcv]; + KASSERT(anon->an_lock == amap->am_lock); if (anon->an_page && (anon->an_page->pg_flags & (PG_RELEASED|PG_BUSY)) == 0) { uvm_lock_pageq(); @@ -1135,6 +1165,8 @@ uvm_fault_lower(struct uvm_faultinfo *uf /* re-verify the state of the world. */ locked = uvmfault_relock(ufi); + if (locked && amap != NULL) + amap_lock(amap); /* * Re-verify that amap slot is still free. if there is @@ -1212,6 +1244,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf * a zero'd, dirty page, so have * uvm_pagealloc() do that for us. */ + anon->an_lock = amap->am_lock; pg = uvm_pagealloc(NULL, 0, anon, (uobjpage == PGO_DONTCARE) ? UVM_PGA_ZERO : 0); } @@ -1238,6 +1271,8 @@ uvm_fault_lower(struct uvm_faultinfo *uf if (anon == NULL) counters_inc(uvmexp_counters, flt_noanon); else { + anon->an_lock = NULL; + anon->an_ref--; uvm_anfree(anon); counters_inc(uvmexp_counters, flt_noram); } @@ -1265,7 +1300,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf */ if ((amap_flags(amap) & AMAP_SHARED) != 0) { pmap_page_protect(uobjpage, PROT_NONE); - } + } /* dispose of uobjpage. drop handle to uobj as well. */ if (uobjpage->pg_flags & PG_WANTED) @@ -1305,6 +1340,12 @@ uvm_fault_lower(struct uvm_faultinfo *uf * all resources are present. we can now map it in and free our * resources. */ + if (amap == NULL) + KASSERT(anon == NULL); + else { + KASSERT(rw_write_held(amap->am_lock)); + KASSERT(anon == NULL || anon->an_lock == amap->am_lock); + } if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr, VM_PAGE_TO_PHYS(pg) | flt->pa_flags, flt->enter_prot, access_type | PMAP_CANFAIL | (flt->wired ? PMAP_WIRED : 0)) != 0) { @@ -1490,7 +1531,8 @@ void uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap, struct uvm_object *uobj) { - + if (amap != NULL) + amap_unlock(amap); uvmfault_unlockmaps(ufi, FALSE); } Index: sys/uvm/uvm_map.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_map.c,v retrieving revision 1.269 diff -u -p -u -p -r1.269 uvm_map.c --- sys/uvm/uvm_map.c 19 Oct 2020 08:19:46 -0000 1.269 +++ sys/uvm/uvm_map.c 1 Jan 2021 02:57:40 -0000 @@ -1104,10 +1104,8 @@ uvm_mapanon(struct vm_map *map, vaddr_t if (flags & UVM_FLAG_CONCEAL) entry->etype |= UVM_ET_CONCEAL; if (flags & UVM_FLAG_OVERLAY) { - KERNEL_LOCK(); entry->aref.ar_pageoff = 0; entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); - KERNEL_UNLOCK(); } /* Update map and process statistics. */ @@ -2833,9 +2831,7 @@ uvm_map_splitentry(struct vm_map *map, s orig->end = next->start = split; if (next->aref.ar_amap) { - KERNEL_LOCK(); amap_splitref(&orig->aref, &next->aref, adj); - KERNEL_UNLOCK(); } if (UVM_ET_ISSUBMAP(orig)) { uvm_map_reference(next->object.sub_map); @@ -4682,12 +4678,14 @@ uvm_map_clean(struct vm_map *map, vaddr_ cp_start = MAX(entry->start, start); cp_end = MIN(entry->end, end); + amap_lock(amap); for (; cp_start != cp_end; cp_start += PAGE_SIZE) { anon = amap_lookup(&entry->aref, cp_start - entry->start); if (anon == NULL) continue; + KASSERT(anon->an_lock == amap->am_lock); pg = anon->an_page; if (pg == NULL) { continue; @@ -4743,6 +4741,7 @@ deactivate_it: panic("uvm_map_clean: weird flags"); } } + amap_unlock(amap); flush_object: cp_start = MAX(entry->start, start); Index: sys/uvm/uvm_page.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_page.c,v retrieving revision 1.154 diff -u -p -u -p -r1.154 uvm_page.c --- sys/uvm/uvm_page.c 2 Dec 2020 16:32:00 -0000 1.154 +++ sys/uvm/uvm_page.c 1 Jan 2021 02:57:40 -0000 @@ -1050,7 +1050,8 @@ uvm_page_unbusy(struct vm_page **pgs, in } else { atomic_clearbits_int(&pg->pg_flags, PG_BUSY); UVM_PAGE_OWN(pg, NULL); - uvm_anfree(pg->uanon); + rw_enter(pg->uanon->an_lock, RW_WRITE); + uvm_anon_release(pg->uanon); } } else { atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY); Index: sys/uvm/uvm_pager.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/uvm/uvm_pager.c,v retrieving revision 1.73 diff -u -p -u -p -r1.73 uvm_pager.c --- sys/uvm/uvm_pager.c 21 Oct 2020 09:08:14 -0000 1.73 +++ sys/uvm/uvm_pager.c 1 Jan 2021 02:57:40 -0000 @@ -649,7 +649,8 @@ uvm_pager_dropcluster(struct uvm_object UVM_PAGE_OWN(ppsp[lcv], NULL); /* kills anon and frees pg */ - uvm_anfree(ppsp[lcv]->uanon); + rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE); + uvm_anon_release(ppsp[lcv]->uanon); continue; } else {