Index: sys/kern/uipc_socket2.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_socket2.c,v retrieving revision 1.104 diff -u -p -u -p -r1.104 uipc_socket2.c --- sys/kern/uipc_socket2.c 11 Apr 2020 14:07:06 -0000 1.104 +++ sys/kern/uipc_socket2.c 15 Jan 2021 19:04:21 -0000 @@ -53,6 +53,8 @@ u_long sb_max = SB_MAX; /* patchable */ extern struct pool mclpools[]; extern struct pool mbpool; +extern struct rwlock unp_lock; + /* * Procedures to manipulate state flags of socket * and do appropriate wakeups. Normal sequence from the @@ -282,6 +284,8 @@ solock(struct socket *so) NET_LOCK(); break; case PF_UNIX: + rw_enter_write(&unp_lock); + break; case PF_ROUTE: case PF_KEY: default: @@ -306,6 +310,8 @@ sounlock(struct socket *so, int s) NET_UNLOCK(); break; case PF_UNIX: + rw_exit_write(&unp_lock); + break; case PF_ROUTE: case PF_KEY: default: @@ -323,6 +329,8 @@ soassertlocked(struct socket *so) NET_ASSERT_LOCKED(); break; case PF_UNIX: + rw_assert_wrlock(&unp_lock); + break; case PF_ROUTE: case PF_KEY: default: @@ -335,12 +343,24 @@ int sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg, uint64_t nsecs) { - if ((so->so_proto->pr_domain->dom_family != PF_UNIX) && - (so->so_proto->pr_domain->dom_family != PF_ROUTE) && - (so->so_proto->pr_domain->dom_family != PF_KEY)) { - return rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs); - } else - return tsleep_nsec(ident, prio, wmesg, nsecs); + int ret; + + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs); + break; + case PF_UNIX: + ret = rwsleep_nsec(ident, &unp_lock, prio, wmesg, nsecs); + break; + case PF_ROUTE: + case PF_KEY: + default: + ret = tsleep_nsec(ident, prio, wmesg, nsecs); + break; + } + + return ret; } /* Index: sys/kern/uipc_usrreq.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_usrreq.c,v retrieving revision 1.142 diff -u -p -u -p -r1.142 uipc_usrreq.c --- sys/kern/uipc_usrreq.c 16 Jul 2019 21:41:37 -0000 1.142 +++ sys/kern/uipc_usrreq.c 15 Jan 2021 19:04:21 -0000 @@ -51,10 +51,18 @@ #include #include #include +#include + +/* + * Locks used to protect global data and struct members: + * I immutable after creation + * U unp_lock + */ +struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock"); void uipc_setaddr(const struct unpcb *, struct mbuf *); -/* list of all UNIX domain sockets, for unp_gc() */ +/* [U] list of all UNIX domain sockets, for unp_gc() */ LIST_HEAD(unp_head, unpcb) unp_head = LIST_HEAD_INITIALIZER(unp_head); /* @@ -62,10 +70,10 @@ LIST_HEAD(unp_head, unpcb) unp_head = LI * not received and need to be closed. */ struct unp_deferral { - SLIST_ENTRY(unp_deferral) ud_link; - int ud_n; + SLIST_ENTRY(unp_deferral) ud_link; /* [U] */ + int ud_n; /* [I] */ /* followed by ud_n struct fdpass */ - struct fdpass ud_fp[]; + struct fdpass ud_fp[]; /* [I] */ }; void unp_discard(struct fdpass *, int); @@ -74,7 +82,7 @@ void unp_scan(struct mbuf *, void (*)(st int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *); struct pool unpcb_pool; -/* list of sets of files that were sent over sockets that are now closed */ +/* [U] list of sets of files that were sent over sockets that are now closed */ SLIST_HEAD(,unp_deferral) unp_deferred = SLIST_HEAD_INITIALIZER(unp_deferred); struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL); @@ -89,7 +97,7 @@ struct task unp_gc_task = TASK_INITIALIZ * need a proper out-of-band */ struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; -ino_t unp_ino; /* prototype for fake inode numbers */ +ino_t unp_ino; /* [U] prototype for fake inode numbers */ void unp_init(void) @@ -351,7 +359,7 @@ u_long unpst_recvspace = PIPSIZ; u_long unpdg_sendspace = 2*1024; /* really max datagram size */ u_long unpdg_recvspace = 4*1024; -int unp_rights; /* file descriptors in flight */ +int unp_rights; /* [U] file descriptors in flight */ int uipc_attach(struct socket *so, int proto) @@ -359,6 +367,8 @@ uipc_attach(struct socket *so, int proto struct unpcb *unp; int error; + rw_assert_wrlock(&unp_lock); + if (so->so_pcb) return EISCONN; if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { @@ -409,12 +419,21 @@ unp_detach(struct unpcb *unp) { struct vnode *vp; + rw_assert_wrlock(&unp_lock); + LIST_REMOVE(unp, unp_link); if (unp->unp_vnode) { + /* + * We are the only owner of this `unp'. The only access + * to `v_socket' outside unp layer is pointer to integer + * cast. This unlocked cleaning of `v_socket' is safe. + */ unp->unp_vnode->v_socket = NULL; vp = unp->unp_vnode; unp->unp_vnode = NULL; + KERNEL_LOCK(); vrele(vp); + KERNEL_UNLOCK(); } if (unp->unp_conn) unp_disconnect(unp); @@ -439,10 +458,16 @@ unp_bind(struct unpcb *unp, struct mbuf struct nameidata nd; size_t pathlen; - if (unp->unp_vnode != NULL) + if (unp->unp_flags & UNP_BINDING) return (EINVAL); + unp->unp_flags |= UNP_BINDING; + + if (unp->unp_vnode != NULL) { + error = EINVAL; + goto out; + } if ((error = unp_nam2sun(nam, &soun, &pathlen))) - return (error); + goto out; nam2 = m_getclr(M_WAITOK, MT_SONAME); nam2->m_len = sizeof(struct sockaddr_un); @@ -458,10 +483,21 @@ unp_bind(struct unpcb *unp, struct mbuf NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, soun->sun_path, p); nd.ni_pledge = PLEDGE_UNIX; + + /* + * We enforce `i_lock' -> `unplock' because fifo subsystem + * requires this. Socket has associated file descriptor and + * already protected by reference counter. It can't be closed. + */ + + sounlock(unp->unp_socket, SL_LOCKED); + KERNEL_LOCK(); /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ - if ((error = namei(&nd)) != 0) { + error = namei(&nd); + solock(unp->unp_socket); + if (error != 0) { m_freem(nam2); - return (error); + goto unlock; } vp = nd.ni_vp; if (vp != NULL) { @@ -472,7 +508,8 @@ unp_bind(struct unpcb *unp, struct mbuf vput(nd.ni_dvp); vrele(vp); m_freem(nam2); - return (EADDRINUSE); + error = EADDRINUSE; + goto unlock; } VATTR_NULL(&vattr); vattr.va_type = VSOCK; @@ -481,7 +518,7 @@ unp_bind(struct unpcb *unp, struct mbuf vput(nd.ni_dvp); if (error) { m_freem(nam2); - return (error); + goto unlock; } unp->unp_addr = nam2; vp = nd.ni_vp; @@ -492,7 +529,12 @@ unp_bind(struct unpcb *unp, struct mbuf unp->unp_connid.pid = p->p_p->ps_pid; unp->unp_flags |= UNP_FEIDSBIND; VOP_UNLOCK(vp); - return (0); +unlock: + KERNEL_UNLOCK(); +out: + unp->unp_flags &= ~UNP_BINDING; + + return error; } int @@ -505,13 +547,29 @@ unp_connect(struct socket *so, struct mb struct nameidata nd; int error; + unp = sotounpcb(so); + if (unp->unp_flags & UNP_CONNECTING) + return EISCONN; + unp->unp_flags |= UNP_CONNECTING; + if ((error = unp_nam2sun(nam, &soun, NULL))) - return (error); + goto out; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); nd.ni_pledge = PLEDGE_UNIX; - if ((error = namei(&nd)) != 0) - return (error); + + /* + * We enforce `i_lock' -> `unplock' because fifo subsystem + * requires this. Socket has associated file descriptor and + * already protected by reference counter. It can't be closed. + */ + + sounlock(so, SL_LOCKED); + KERNEL_LOCK(); + error = namei(&nd); + solock(so); + if (error != 0) + goto unlock; vp = nd.ni_vp; if (vp->v_type != VSOCK) { error = ENOTSOCK; @@ -534,7 +592,6 @@ unp_connect(struct socket *so, struct mb error = ECONNREFUSED; goto bad; } - unp = sotounpcb(so); unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); if (unp2->unp_addr) @@ -553,6 +610,11 @@ unp_connect(struct socket *so, struct mb error = unp_connect2(so, so2); bad: vput(vp); +unlock: + KERNEL_UNLOCK(); +out: + unp->unp_flags &= ~UNP_CONNECTING; + return (error); } @@ -562,6 +624,8 @@ unp_connect2(struct socket *so, struct s struct unpcb *unp = sotounpcb(so); struct unpcb *unp2; + rw_assert_wrlock(&unp_lock); + if (so2->so_type != so->so_type) return (EPROTOTYPE); unp2 = sotounpcb(so2); @@ -635,15 +699,16 @@ unp_drop(struct unpcb *unp, int errno) { struct socket *so = unp->unp_socket; - KERNEL_ASSERT_LOCKED(); + rw_assert_wrlock(&unp_lock); so->so_error = errno; unp_disconnect(unp); if (so->so_head) { so->so_pcb = NULL; /* - * As long as the KERNEL_LOCK() is the default lock for Unix - * sockets, do not release it. + * As long as `unp_lock' is taken before entering + * uipc_usrreq() releasing it here would lead to a + * double unlock. */ sofree(so, SL_NOUNLOCK); m_freem(unp->unp_addr); @@ -685,6 +750,8 @@ unp_externalize(struct mbuf *rights, soc struct file *fp; int nfds, error = 0; + rw_assert_wrlock(&unp_lock); + /* * This code only works because SCM_RIGHTS is the only supported * control message type on unix sockets. Enforce this here. @@ -705,6 +772,10 @@ unp_externalize(struct mbuf *rights, soc /* Make sure the recipient should be able to see the descriptors.. */ rp = (struct fdpass *)CMSG_DATA(cm); + + /* fdp->fd_rdir requires KERNEL_LOCK() */ + KERNEL_LOCK(); + for (i = 0; i < nfds; i++) { fp = rp->fp; rp++; @@ -728,6 +799,8 @@ unp_externalize(struct mbuf *rights, soc } } + KERNEL_UNLOCK(); + fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK); restart: @@ -825,6 +898,8 @@ unp_internalize(struct mbuf *control, st int i, error; int nfds, *ip, fd, neededspace; + rw_assert_wrlock(&unp_lock); + /* * Check for two potential msg_controllen values because * IETF stuck their nose in a place it does not belong. @@ -923,7 +998,8 @@ fail: return (error); } -int unp_defer, unp_gcing; +int unp_defer; /* [U] number of deferred fp to close by the GC task */ +int unp_gcing; /* [U] GC task currently running */ void unp_gc(void *arg __unused) @@ -934,8 +1010,10 @@ unp_gc(void *arg __unused) struct unpcb *unp; int nunref, i; + rw_enter_write(&unp_lock); + if (unp_gcing) - return; + goto unlock; unp_gcing = 1; /* close any fds on the deferred list */ @@ -950,7 +1028,9 @@ unp_gc(void *arg __unused) if ((unp = fptounp(fp)) != NULL) unp->unp_msgcount--; unp_rights--; + rw_exit_write(&unp_lock); (void) closef(fp, NULL); + rw_enter_write(&unp_lock); } free(defer, M_TEMP, sizeof(*defer) + sizeof(struct fdpass) * defer->ud_n); @@ -1021,6 +1101,8 @@ unp_gc(void *arg __unused) } } unp_gcing = 0; +unlock: + rw_exit_write(&unp_lock); } void @@ -1066,6 +1148,8 @@ unp_mark(struct fdpass *rp, int nfds) struct unpcb *unp; int i; + rw_assert_wrlock(&unp_lock); + for (i = 0; i < nfds; i++) { if (rp[i].fp == NULL) continue; @@ -1087,6 +1171,8 @@ void unp_discard(struct fdpass *rp, int nfds) { struct unp_deferral *defer; + + rw_assert_wrlock(&unp_lock); /* copy the file pointers to a deferral structure */ defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK); Index: sys/sys/unpcb.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/sys/unpcb.h,v retrieving revision 1.17 diff -u -p -u -p -r1.17 unpcb.h --- sys/sys/unpcb.h 15 Jul 2019 12:28:06 -0000 1.17 +++ sys/sys/unpcb.h 15 Jan 2021 19:04:21 -0000 @@ -56,22 +56,27 @@ * Stream sockets keep copies of receive sockbuf sb_cc and sb_mbcnt * so that changes in the sockbuf may be computed to modify * back pressure on the sender accordingly. + * + * Locks used to protect struct members: + * I immutable after creation + * U unp_lock */ + struct unpcb { - struct socket *unp_socket; /* pointer back to socket */ - struct vnode *unp_vnode; /* if associated with file */ - struct file *unp_file; /* backpointer for unp_gc() */ - struct unpcb *unp_conn; /* control block of connected socket */ - ino_t unp_ino; /* fake inode number */ - SLIST_HEAD(,unpcb) unp_refs; /* referencing socket linked list */ - SLIST_ENTRY(unpcb) unp_nextref; /* link in unp_refs list */ - struct mbuf *unp_addr; /* bound address of socket */ - long unp_msgcount; /* references from socket rcv buf */ - int unp_flags; /* this unpcb contains peer eids */ - struct sockpeercred unp_connid;/* id of peer process */ - struct timespec unp_ctime; /* holds creation time */ - LIST_ENTRY(unpcb) unp_link; /* link in per-AF list of sockets */ + struct socket *unp_socket; /* [I] pointer back to socket */ + struct vnode *unp_vnode; /* [U] if associated with file */ + struct file *unp_file; /* [U] backpointer for unp_gc() */ + struct unpcb *unp_conn; /* [U] control block of connected socket */ + ino_t unp_ino; /* [U] fake inode number */ + SLIST_HEAD(,unpcb) unp_refs; /* [U] referencing socket linked list */ + SLIST_ENTRY(unpcb) unp_nextref; /* [U] link in unp_refs list */ + struct mbuf *unp_addr; /* [U] bound address of socket */ + long unp_msgcount; /* [U] references from socket rcv buf */ + int unp_flags; /* [U] this unpcb contains peer eids */ + struct sockpeercred unp_connid;/* [U] id of peer process */ + struct timespec unp_ctime; /* [I] holds creation time */ + LIST_ENTRY(unpcb) unp_link; /* [U] link in per-AF list of sockets */ }; /* @@ -82,6 +87,8 @@ struct unpcb { #define UNP_GCMARK 0x04 /* mark during unp_gc() */ #define UNP_GCDEFER 0x08 /* ref'd, but not marked in this pass */ #define UNP_GCDEAD 0x10 /* unref'd in this pass */ +#define UNP_BINDING 0x20 /* unp is binding now */ +#define UNP_CONNECTING 0x40 /* unp is connecting now */ #define sotounpcb(so) ((struct unpcb *)((so)->so_pcb))