Index: sys/kern/kern_event.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/kern_event.c,v retrieving revision 1.167 diff -u -p -u -p -r1.167 kern_event.c --- sys/kern/kern_event.c 16 Jun 2021 14:26:30 -0000 1.167 +++ sys/kern/kern_event.c 21 Jul 2021 23:06:33 -0000 @@ -1884,6 +1884,9 @@ knote_dequeue(struct knote *kn) void knote_modify(const struct kevent *kev, struct knote *kn) { + if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) + KERNEL_ASSERT_LOCKED(); + kn->kn_sfflags = kev->fflags; kn->kn_sdata = kev->data; kn->kn_udata = kev->udata; @@ -1897,6 +1900,9 @@ knote_modify(const struct kevent *kev, s void knote_submit(struct knote *kn, struct kevent *kev) { + if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) + KERNEL_ASSERT_LOCKED(); + if (kev != NULL) { *kev = kn->kn_kevent; if (kn->kn_flags & EV_CLEAR) { Index: sys/kern/uipc_socket.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.263 diff -u -p -u -p -r1.263 uipc_socket.c --- sys/kern/uipc_socket.c 28 May 2021 16:24:53 -0000 1.263 +++ sys/kern/uipc_socket.c 21 Jul 2021 23:06:33 -0000 @@ -84,7 +84,7 @@ int filt_solistenprocess(struct knote *k int filt_solisten_common(struct knote *kn, struct socket *so); const struct filterops solisten_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_solisten, @@ -93,7 +93,7 @@ const struct filterops solisten_filtops }; const struct filterops soread_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_soread, @@ -102,7 +102,7 @@ const struct filterops soread_filtops = }; const struct filterops sowrite_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_sowdetach, .f_event = filt_sowrite, @@ -111,7 +111,7 @@ const struct filterops sowrite_filtops = }; const struct filterops soexcept_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_soread, @@ -181,6 +181,9 @@ socreate(int dom, struct socket **aso, i so->so_egid = p->p_ucred->cr_gid; so->so_cpid = p->p_p->ps_pid; so->so_proto = prp; + mtx_init(&so->so_mtx, IPL_MPFLOOR); + klist_init_mutex(&so->so_snd.sb_sel.si_note, &so->so_mtx); + klist_init_mutex(&so->so_rcv.sb_sel.si_note, &so->so_mtx); so->so_snd.sb_timeo_nsecs = INFSLP; so->so_rcv.sb_timeo_nsecs = INFSLP; @@ -276,7 +279,9 @@ sofree(struct socket *so, int s) } } #endif /* SOCKET_SPLICE */ + mtx_enter(&so->so_mtx); sbrelease(so, &so->so_snd); + mtx_leave(&so->so_mtx); sorflush(so); sounlock(so, s); #ifdef SOCKET_SPLICE @@ -860,7 +865,7 @@ dontblock: *paddr = m_copym(m, 0, m->m_len, M_NOWAIT); m = m->m_next; } else { - sbfree(&so->so_rcv, m); + sbfree(so, &so->so_rcv, m); if (paddr) { *paddr = m; so->so_rcv.sb_mb = m->m_next; @@ -884,7 +889,7 @@ dontblock: *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); m = m->m_next; } else { - sbfree(&so->so_rcv, m); + sbfree(so, &so->so_rcv, m); so->so_rcv.sb_mb = m->m_next; m->m_nextpkt = m->m_next = NULL; cm = m; @@ -984,7 +989,7 @@ dontblock: orig_resid = 0; } else { nextrecord = m->m_nextpkt; - sbfree(&so->so_rcv, m); + sbfree(so, &so->so_rcv, m); if (mp) { *mp = m; mp = &m->m_next; @@ -1019,8 +1024,10 @@ dontblock: *mp = m_copym(m, 0, len, M_WAIT); m->m_data += len; m->m_len -= len; + mtx_enter(&so->so_mtx); so->so_rcv.sb_cc -= len; so->so_rcv.sb_datacc -= len; + mtx_leave(&so->so_mtx); } } if (so->so_oobmark) { @@ -1065,7 +1072,7 @@ dontblock: if (m && pr->pr_flags & PR_ATOMIC) { flags |= MSG_TRUNC; if ((flags & MSG_PEEK) == 0) - (void) sbdroprecord(&so->so_rcv); + (void) sbdroprecord(so, &so->so_rcv); } if ((flags & MSG_PEEK) == 0) { if (m == NULL) { @@ -1452,7 +1459,7 @@ somove(struct socket *so, int wait) while (m && m->m_type == MT_CONTROL) m = m->m_next; if (m == NULL) { - sbdroprecord(&so->so_rcv); + sbdroprecord(so, &so->so_rcv); if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb) (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL, NULL); @@ -1492,7 +1499,7 @@ somove(struct socket *so, int wait) * that the whole first record can be processed. */ m = so->so_rcv.sb_mb; - sbfree(&so->so_rcv, m); + sbfree(so, &so->so_rcv, m); so->so_rcv.sb_mb = m_free(m); sbsync(&so->so_rcv, nextrecord); } @@ -1502,7 +1509,7 @@ somove(struct socket *so, int wait) */ m = so->so_rcv.sb_mb; while (m && m->m_type == MT_CONTROL) { - sbfree(&so->so_rcv, m); + sbfree(so, &so->so_rcv, m); so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; sbsync(&so->so_rcv, nextrecord); @@ -1537,11 +1544,13 @@ somove(struct socket *so, int wait) } so->so_rcv.sb_mb->m_data += size; so->so_rcv.sb_mb->m_len -= size; + mtx_enter(&so->so_mtx); so->so_rcv.sb_cc -= size; so->so_rcv.sb_datacc -= size; + mtx_leave(&so->so_mtx); } else { *mp = so->so_rcv.sb_mb; - sbfree(&so->so_rcv, *mp); + sbfree(so, &so->so_rcv, *mp); so->so_rcv.sb_mb = (*mp)->m_next; sbsync(&so->so_rcv, nextrecord); } @@ -1550,7 +1559,7 @@ somove(struct socket *so, int wait) SBLASTRECORDCHK(&so->so_rcv, "somove 3"); SBLASTMBUFCHK(&so->so_rcv, "somove 3"); - SBCHECK(&so->so_rcv); + SBCHECK(so, &so->so_rcv); if (m == NULL) goto release; m->m_nextpkt = NULL; @@ -1777,30 +1786,40 @@ sosetopt(struct socket *so, int level, i case SO_SNDBUF: if (so->so_state & SS_CANTSENDMORE) return (EINVAL); + mtx_enter(&so->so_mtx); if (sbcheckreserve(cnt, so->so_snd.sb_wat) || sbreserve(so, &so->so_snd, cnt)) - return (ENOBUFS); - so->so_snd.sb_wat = cnt; + error = ENOBUFS; + if (error == 0) + so->so_snd.sb_wat = cnt; + mtx_leave(&so->so_mtx); break; case SO_RCVBUF: if (so->so_state & SS_CANTRCVMORE) return (EINVAL); + mtx_enter(&so->so_mtx); if (sbcheckreserve(cnt, so->so_rcv.sb_wat) || sbreserve(so, &so->so_rcv, cnt)) - return (ENOBUFS); - so->so_rcv.sb_wat = cnt; + error = ENOBUFS; + if (error == 0) + so->so_rcv.sb_wat = cnt; + mtx_leave(&so->so_mtx); break; case SO_SNDLOWAT: + mtx_enter(&so->so_mtx); so->so_snd.sb_lowat = (cnt > so->so_snd.sb_hiwat) ? so->so_snd.sb_hiwat : cnt; + mtx_leave(&so->so_mtx); break; case SO_RCVLOWAT: + mtx_leave(&so->so_mtx); so->so_rcv.sb_lowat = (cnt > so->so_rcv.sb_hiwat) ? so->so_rcv.sb_hiwat : cnt; + mtx_leave(&so->so_mtx); break; } break; @@ -2028,7 +2047,12 @@ void sohasoutofband(struct socket *so) { pgsigio(&so->so_sigio, SIGURG, 0); + /* XXX KERNEL_LOCK() needed because of legacy poll/select */ + KERNEL_LOCK(); + mtx_enter(&so->so_mtx); selwakeup(&so->so_rcv.sb_sel); + mtx_leave(&so->so_mtx); + KERNEL_UNLOCK(); } int @@ -2037,8 +2061,6 @@ soo_kqfilter(struct file *fp, struct kno struct socket *so = kn->kn_fp->f_data; struct sockbuf *sb; - KERNEL_ASSERT_LOCKED(); - switch (kn->kn_filter) { case EVFILT_READ: if (so->so_options & SO_ACCEPTCONN) @@ -2059,7 +2081,7 @@ soo_kqfilter(struct file *fp, struct kno return (EINVAL); } - klist_insert_locked(&sb->sb_sel.si_note, kn); + klist_insert(&sb->sb_sel.si_note, kn); return (0); } @@ -2069,17 +2091,16 @@ filt_sordetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; - KERNEL_ASSERT_LOCKED(); - - klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn); + klist_remove(&so->so_rcv.sb_sel.si_note, kn); } int filt_soread_common(struct knote *kn, struct socket *so) { + u_int sostate = so->so_state; int rv = 0; - soassertlocked(so); + MUTEX_ASSERT_LOCKED(&so->so_mtx); kn->kn_data = so->so_rcv.sb_cc; #ifdef SOCKET_SPLICE @@ -2088,15 +2109,17 @@ filt_soread_common(struct knote *kn, str } else #endif /* SOCKET_SPLICE */ if (kn->kn_sfflags & NOTE_OOB) { - if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) { + u_long oobmark = so->so_oobmark; + + if (oobmark || (sostate & SS_RCVATMARK)) { kn->kn_fflags |= NOTE_OOB; - kn->kn_data -= so->so_oobmark; + kn->kn_data -= oobmark; rv = 1; } - } else if (so->so_state & SS_CANTRCVMORE) { + } else if (sostate & SS_CANTRCVMORE) { kn->kn_flags |= EV_EOF; if (kn->kn_flags & __EV_POLL) { - if (so->so_state & SS_ISDISCONNECTED) + if (sostate & SS_ISDISCONNECTED) kn->kn_flags |= __EV_HUP; } kn->kn_fflags = so->so_error; @@ -2124,12 +2147,12 @@ int filt_soreadmodify(struct kevent *kev, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_mtx); knote_modify(kev, kn); rv = filt_soread_common(kn, so); - sounlock(so, s); + mtx_leave(&so->so_mtx); return (rv); } @@ -2138,16 +2161,16 @@ int filt_soreadprocess(struct knote *kn, struct kevent *kev) { struct socket *so = kn->kn_fp->f_data; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_mtx); if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) rv = 1; else rv = filt_soread_common(kn, so); if (rv != 0) knote_submit(kn, kev); - sounlock(so, s); + mtx_leave(&so->so_mtx); return (rv); } @@ -2157,30 +2180,29 @@ filt_sowdetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; - KERNEL_ASSERT_LOCKED(); - - klist_remove_locked(&so->so_snd.sb_sel.si_note, kn); + klist_remove(&so->so_snd.sb_sel.si_note, kn); } int filt_sowrite_common(struct knote *kn, struct socket *so) { + u_int sostate = so->so_state; int rv; - soassertlocked(so); + MUTEX_ASSERT_LOCKED(&so->so_mtx); - kn->kn_data = sbspace(so, &so->so_snd); - if (so->so_state & SS_CANTSENDMORE) { + kn->kn_data = sbspace_locked(so, &so->so_snd); + if (sostate & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; if (kn->kn_flags & __EV_POLL) { - if (so->so_state & SS_ISDISCONNECTED) + if (sostate & SS_ISDISCONNECTED) kn->kn_flags |= __EV_HUP; } kn->kn_fflags = so->so_error; rv = 1; } else if (so->so_error) { /* temporary udp error */ rv = 1; - } else if (((so->so_state & SS_ISCONNECTED) == 0) && + } else if (((sostate & SS_ISCONNECTED) == 0) && (so->so_proto->pr_flags & PR_CONNREQUIRED)) { rv = 0; } else if (kn->kn_sfflags & NOTE_LOWAT) { @@ -2204,12 +2226,12 @@ int filt_sowritemodify(struct kevent *kev, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_mtx); knote_modify(kev, kn); rv = filt_sowrite_common(kn, so); - sounlock(so, s); + mtx_leave(&so->so_mtx); return (rv); } @@ -2218,16 +2240,16 @@ int filt_sowriteprocess(struct knote *kn, struct kevent *kev) { struct socket *so = kn->kn_fp->f_data; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_mtx); if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) rv = 1; else rv = filt_sowrite_common(kn, so); if (rv != 0) knote_submit(kn, kev); - sounlock(so, s); + mtx_leave(&so->so_mtx); return (rv); } @@ -2235,8 +2257,6 @@ filt_sowriteprocess(struct knote *kn, st int filt_solisten_common(struct knote *kn, struct socket *so) { - soassertlocked(so); - kn->kn_data = so->so_qlen; return (kn->kn_data != 0); @@ -2254,12 +2274,12 @@ int filt_solistenmodify(struct kevent *kev, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_mtx); knote_modify(kev, kn); rv = filt_solisten_common(kn, so); - sounlock(so, s); + mtx_leave(&so->so_mtx); return (rv); } @@ -2268,16 +2288,16 @@ int filt_solistenprocess(struct knote *kn, struct kevent *kev) { struct socket *so = kn->kn_fp->f_data; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_mtx); if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) rv = 1; else rv = filt_solisten_common(kn, so); if (rv != 0) knote_submit(kn, kev); - sounlock(so, s); + mtx_leave(&so->so_mtx); return (rv); } Index: sys/kern/uipc_socket2.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_socket2.c,v retrieving revision 1.111 diff -u -p -u -p -r1.111 uipc_socket2.c --- sys/kern/uipc_socket2.c 7 Jun 2021 09:10:32 -0000 1.111 +++ sys/kern/uipc_socket2.c 21 Jul 2021 23:06:33 -0000 @@ -34,7 +34,6 @@ #include #include -#include #include #include #include @@ -163,6 +162,9 @@ sonewconn(struct socket *head, int conns if (so == NULL) return (NULL); rw_init(&so->so_lock, "solock"); + mtx_init(&so->so_mtx, IPL_MPFLOOR); + klist_init_mutex(&so->so_snd.sb_sel.si_note, &so->so_mtx); + klist_init_mutex(&so->so_rcv.sb_sel.si_note, &so->so_mtx); so->so_type = head->so_type; so->so_options = head->so_options &~ SO_ACCEPTCONN; so->so_linger = head->so_linger; @@ -423,7 +425,12 @@ sowakeup(struct socket *so, struct sockb } if (sb->sb_flags & SB_ASYNC) pgsigio(&so->so_sigio, SIGIO, 0); + /* XXX KERNEL_LOCK() needed because of legacy poll/select */ + KERNEL_LOCK(); + mtx_enter(&so->so_mtx); selwakeup(&sb->sb_sel); + mtx_leave(&so->so_mtx); + KERNEL_UNLOCK(); } /* @@ -463,6 +470,7 @@ soreserve(struct socket *so, u_long sndc { soassertlocked(so); + mtx_enter(&so->so_mtx); if (sbreserve(so, &so->so_snd, sndcc)) goto bad; if (sbreserve(so, &so->so_rcv, rcvcc)) @@ -475,10 +483,12 @@ soreserve(struct socket *so, u_long sndc so->so_snd.sb_lowat = MCLBYTES; if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) so->so_snd.sb_lowat = so->so_snd.sb_hiwat; + mtx_leave(&so->so_mtx); return (0); bad2: sbrelease(so, &so->so_snd); bad: + mtx_leave(&so->so_mtx); return (ENOBUFS); } @@ -492,6 +502,7 @@ sbreserve(struct socket *so, struct sock { KASSERT(sb == &so->so_rcv || sb == &so->so_snd); soassertlocked(so); + MUTEX_ASSERT_LOCKED(&so->so_mtx); if (cc == 0 || cc > sb_max) return (1); @@ -533,6 +544,7 @@ sbchecklowmem(void) void sbrelease(struct socket *so, struct sockbuf *sb) { + MUTEX_ASSERT_LOCKED(&so->so_mtx); sbflush(so, sb); sb->sb_hiwat = sb->sb_mbmax = 0; @@ -654,7 +666,7 @@ sbappend(struct socket *so, struct sockb */ sb->sb_lastrecord = m; } - sbcompress(sb, m, n); + sbcompress(so, sb, m, n); SBLASTRECORDCHK(sb, "sbappend 2"); } @@ -673,7 +685,7 @@ sbappendstream(struct socket *so, struct SBLASTMBUFCHK(sb, __func__); - sbcompress(sb, m, sb->sb_mbtail); + sbcompress(so, sb, m, sb->sb_mbtail); sb->sb_lastrecord = sb->sb_mb; SBLASTRECORDCHK(sb, __func__); @@ -681,11 +693,12 @@ sbappendstream(struct socket *so, struct #ifdef SOCKBUF_DEBUG void -sbcheck(struct sockbuf *sb) +sbcheck(struct socket *so, struct sockbuf *sb) { struct mbuf *m, *n; u_long len = 0, mbcnt = 0; + mtx_enter(&so->so_mtx); for (m = sb->sb_mb; m; m = m->m_nextpkt) { for (n = m; n; n = n->m_next) { len += n->m_len; @@ -701,6 +714,7 @@ sbcheck(struct sockbuf *sb) mbcnt, sb->sb_mbcnt); panic("sbcheck"); } + mtx_leave(&so->so_mtx); } #endif @@ -723,7 +737,7 @@ sbappendrecord(struct socket *so, struct * Put the first mbuf on the queue. * Note this permits zero length records. */ - sballoc(sb, m0); + sballoc(so, sb, m0); SBLASTRECORDCHK(sb, "sbappendrecord 1"); SBLINKRECORD(sb, m0); m = m0->m_next; @@ -732,60 +746,11 @@ sbappendrecord(struct socket *so, struct m0->m_flags &= ~M_EOR; m->m_flags |= M_EOR; } - sbcompress(sb, m, m0); + sbcompress(so, sb, m, m0); SBLASTRECORDCHK(sb, "sbappendrecord 2"); } /* - * As above except that OOB data - * is inserted at the beginning of the sockbuf, - * but after any other OOB data. - */ -void -sbinsertoob(struct sockbuf *sb, struct mbuf *m0) -{ - struct mbuf *m, **mp; - - if (m0 == NULL) - return; - - SBLASTRECORDCHK(sb, "sbinsertoob 1"); - - for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { - again: - switch (m->m_type) { - - case MT_OOBDATA: - continue; /* WANT next train */ - - case MT_CONTROL: - if ((m = m->m_next) != NULL) - goto again; /* inspect THIS train further */ - } - break; - } - /* - * Put the first mbuf on the queue. - * Note this permits zero length records. - */ - sballoc(sb, m0); - m0->m_nextpkt = *mp; - if (*mp == NULL) { - /* m0 is actually the new tail */ - sb->sb_lastrecord = m0; - } - *mp = m0; - m = m0->m_next; - m0->m_next = NULL; - if (m && (m0->m_flags & M_EOR)) { - m0->m_flags &= ~M_EOR; - m->m_flags |= M_EOR; - } - sbcompress(sb, m, m0); - SBLASTRECORDCHK(sb, "sbinsertoob 2"); -} - -/* * Append address and data, and optionally, control (ancillary) data * to the receive queue of a socket. If present, * m0 must include a packet header with total length. @@ -827,8 +792,8 @@ sbappendaddr(struct socket *so, struct s SBLASTRECORDCHK(sb, "sbappendaddr 1"); for (n = m; n->m_next != NULL; n = n->m_next) - sballoc(sb, n); - sballoc(sb, n); + sballoc(so, sb, n); + sballoc(so, sb, n); nlast = n; SBLINKRECORD(sb, m); @@ -864,8 +829,8 @@ sbappendcontrol(struct socket *so, struc SBLASTRECORDCHK(sb, "sbappendcontrol 1"); for (m = control; m->m_next != NULL; m = m->m_next) - sballoc(sb, m); - sballoc(sb, m); + sballoc(so, sb, m); + sballoc(so, sb, m); mlast = m; SBLINKRECORD(sb, control); @@ -883,7 +848,8 @@ sbappendcontrol(struct socket *so, struc * is null, the buffer is presumed empty. */ void -sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) +sbcompress(struct socket *so, struct sockbuf *sb, struct mbuf *m, + struct mbuf *n) { int eor = 0; struct mbuf *o; @@ -908,9 +874,11 @@ sbcompress(struct sockbuf *sb, struct mb memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t), m->m_len); n->m_len += m->m_len; + mtx_enter(&so->so_mtx); sb->sb_cc += m->m_len; if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME) sb->sb_datacc += m->m_len; + mtx_leave(&so->so_mtx); m = m_free(m); continue; } @@ -919,7 +887,7 @@ sbcompress(struct sockbuf *sb, struct mb else sb->sb_mb = m; sb->sb_mbtail = m; - sballoc(sb, m); + sballoc(so, sb, m); n = m; m->m_flags &= ~M_EOR; m = m->m_next; @@ -943,6 +911,7 @@ sbflush(struct socket *so, struct sockbu { KASSERT(sb == &so->so_rcv || sb == &so->so_snd); KASSERT((sb->sb_flags & SB_LOCK) == 0); + MUTEX_ASSERT_LOCKED(&so->so_mtx); while (sb->sb_mbcnt) sbdrop(so, sb, (int)sb->sb_cc); @@ -965,6 +934,7 @@ sbdrop(struct socket *so, struct sockbuf KASSERT(sb == &so->so_rcv || sb == &so->so_snd); soassertlocked(so); + MUTEX_ASSERT_LOCKED(&so->so_mtx); next = (m = sb->sb_mb) ? m->m_nextpkt : NULL; while (len > 0) { @@ -984,12 +954,12 @@ sbdrop(struct socket *so, struct sockbuf break; } len -= m->m_len; - sbfree(sb, m); + sbfree_locked(so, sb, m); mn = m_free(m); m = mn; } while (m && m->m_len == 0) { - sbfree(sb, m); + sbfree_locked(so, sb, m); mn = m_free(m); m = mn; } @@ -1016,7 +986,7 @@ sbdrop(struct socket *so, struct sockbuf * and move the next record to the front. */ void -sbdroprecord(struct sockbuf *sb) +sbdroprecord(struct socket *so, struct sockbuf *sb) { struct mbuf *m, *mn; @@ -1024,7 +994,7 @@ sbdroprecord(struct sockbuf *sb) if (m) { sb->sb_mb = m->m_nextpkt; do { - sbfree(sb, m); + sbfree(so, sb, m); mn = m_free(m); } while ((m = mn) != NULL); } Index: sys/kern/uipc_syscalls.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_syscalls.c,v retrieving revision 1.193 diff -u -p -u -p -r1.193 uipc_syscalls.c --- sys/kern/uipc_syscalls.c 2 Jul 2021 12:17:41 -0000 1.193 +++ sys/kern/uipc_syscalls.c 21 Jul 2021 23:06:33 -0000 @@ -308,7 +308,9 @@ doaccept(struct proc *p, int sock, struc : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0); /* connection has been removed from the listen queue */ + mtx_enter(&head->so_mtx); KNOTE(&head->so_rcv.sb_sel.si_note, 0); + mtx_leave(&head->so_mtx); fp->f_type = DTYPE_SOCKET; fp->f_flag = FREAD | FWRITE | nflag; Index: sys/kern/uipc_usrreq.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_usrreq.c,v retrieving revision 1.148 diff -u -p -u -p -r1.148 uipc_usrreq.c --- sys/kern/uipc_usrreq.c 25 May 2021 22:45:09 -0000 1.148 +++ sys/kern/uipc_usrreq.c 21 Jul 2021 23:06:33 -0000 @@ -208,8 +208,10 @@ uipc_usrreq(struct socket *so, int req, * Adjust backpressure on sender * and wakeup any waiting to write. */ + mtx_enter(&so2->so_mtx); so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt; so2->so_snd.sb_cc = so->so_rcv.sb_cc; + mtx_leave(&so2->so_mtx); sowwakeup(so2); break; @@ -284,8 +286,10 @@ uipc_usrreq(struct socket *so, int req, sbappendrecord(so2, &so2->so_rcv, m); else sbappend(so2, &so2->so_rcv, m); + mtx_enter(&so->so_mtx); so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt; so->so_snd.sb_cc = so2->so_rcv.sb_cc; + mtx_leave(&so->so_mtx); if (so2->so_rcv.sb_cc > 0) sorwakeup(so2); m = NULL; @@ -736,12 +740,16 @@ unp_disconnect(struct unpcb *unp) case SOCK_STREAM: case SOCK_SEQPACKET: + mtx_enter(&unp->unp_socket->so_mtx); unp->unp_socket->so_snd.sb_mbcnt = 0; unp->unp_socket->so_snd.sb_cc = 0; + mtx_leave(&unp->unp_socket->so_mtx); soisdisconnected(unp->unp_socket); unp2->unp_conn = NULL; + mtx_enter(&unp2->unp_socket->so_mtx); unp2->unp_socket->so_snd.sb_mbcnt = 0; unp2->unp_socket->so_snd.sb_cc = 0; + mtx_leave(&unp2->unp_socket->so_mtx); soisdisconnected(unp2->unp_socket); break; } Index: sys/miscfs/fifofs/fifo_vnops.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/miscfs/fifofs/fifo_vnops.c,v retrieving revision 1.80 diff -u -p -u -p -r1.80 fifo_vnops.c --- sys/miscfs/fifofs/fifo_vnops.c 13 Jul 2021 07:37:50 -0000 1.80 +++ sys/miscfs/fifofs/fifo_vnops.c 21 Jul 2021 23:06:33 -0000 @@ -114,7 +114,7 @@ int filt_fifowriteprocess(struct knote * int filt_fifowrite_common(struct knote *kn, struct socket *so); const struct filterops fiforead_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_fifordetach, .f_event = filt_fiforead, @@ -123,7 +123,7 @@ const struct filterops fiforead_filtops }; const struct filterops fifowrite_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_fifowdetach, .f_event = filt_fifowrite, @@ -542,7 +542,7 @@ fifo_kqfilter(void *v) ap->a_kn->kn_hook = so; - klist_insert_locked(&sb->sb_sel.si_note, ap->a_kn); + klist_insert(&sb->sb_sel.si_note, ap->a_kn); return (0); } @@ -552,7 +552,7 @@ filt_fifordetach(struct knote *kn) { struct socket *so = (struct socket *)kn->kn_hook; - klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn); + klist_remove(&so->so_rcv.sb_sel.si_note, kn); } int @@ -560,7 +560,7 @@ filt_fiforead_common(struct knote *kn, s { int rv; - soassertlocked(so); + MUTEX_ASSERT_LOCKED(&so->so_mtx); kn->kn_data = so->so_rcv.sb_cc; if (so->so_state & SS_CANTRCVMORE) { @@ -590,12 +590,12 @@ int filt_fiforeadmodify(struct kevent *kev, struct knote *kn) { struct socket *so = kn->kn_hook; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_mtx); knote_modify(kev, kn); rv = filt_fiforead_common(kn, so); - sounlock(so, s); + mtx_leave(&so->so_mtx); return (rv); } @@ -604,16 +604,16 @@ int filt_fiforeadprocess(struct knote *kn, struct kevent *kev) { struct socket *so = kn->kn_hook; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_mtx); if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) rv = 1; else rv = filt_fiforead_common(kn, so); if (rv != 0) knote_submit(kn, kev); - sounlock(so, s); + mtx_leave(&so->so_mtx); return (rv); } @@ -623,7 +623,7 @@ filt_fifowdetach(struct knote *kn) { struct socket *so = (struct socket *)kn->kn_hook; - klist_remove_locked(&so->so_snd.sb_sel.si_note, kn); + klist_remove(&so->so_snd.sb_sel.si_note, kn); } int @@ -631,7 +631,7 @@ filt_fifowrite_common(struct knote *kn, { int rv; - soassertlocked(so); + MUTEX_ASSERT_LOCKED(&so->so_mtx); kn->kn_data = sbspace(so, &so->so_snd); if (so->so_state & SS_CANTSENDMORE) { @@ -657,12 +657,12 @@ int filt_fifowritemodify(struct kevent *kev, struct knote *kn) { struct socket *so = kn->kn_hook; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_mtx); knote_modify(kev, kn); rv = filt_fifowrite_common(kn, so); - sounlock(so, s); + mtx_leave(&so->so_mtx); return (rv); } @@ -671,16 +671,16 @@ int filt_fifowriteprocess(struct knote *kn, struct kevent *kev) { struct socket *so = kn->kn_hook; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_mtx); if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) rv = 1; else rv = filt_fifowrite_common(kn, so); if (rv != 0) knote_submit(kn, kev); - sounlock(so, s); + mtx_leave(&so->so_mtx); return (rv); } Index: sys/netinet/tcp_input.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/netinet/tcp_input.c,v retrieving revision 1.368 diff -u -p -u -p -r1.368 tcp_input.c --- sys/netinet/tcp_input.c 16 Apr 2021 12:08:25 -0000 1.368 +++ sys/netinet/tcp_input.c 21 Jul 2021 23:06:33 -0000 @@ -946,7 +946,9 @@ findpcb: tcpstat_pkt(tcps_rcvackpack, tcps_rcvackbyte, acked); ND6_HINT(tp); + mtx_enter(&so->so_mtx); sbdrop(so, &so->so_snd, acked); + mtx_leave(&so->so_mtx); /* * If we had a pending ICMP message that @@ -1714,6 +1716,7 @@ trimthenstep6: TCP_MAXWIN << tp->snd_scale); } ND6_HINT(tp); + mtx_enter(&so->so_mtx); if (acked > so->so_snd.sb_cc) { if (tp->snd_wnd > so->so_snd.sb_cc) tp->snd_wnd -= so->so_snd.sb_cc; @@ -1729,6 +1732,7 @@ trimthenstep6: tp->snd_wnd = 0; ourfinisacked = 0; } + mtx_leave(&so->so_mtx); tcp_update_sndspace(tp); if (sb_notify(so, &so->so_snd)) { @@ -2967,7 +2971,9 @@ tcp_mss_update(struct tcpcb *tp) bufsize = roundup(bufsize, mss); if (bufsize > sb_max) bufsize = sb_max; + mtx_enter(&so->so_mtx); (void)sbreserve(so, &so->so_snd, bufsize); + mtx_leave(&so->so_mtx); } bufsize = so->so_rcv.sb_hiwat; @@ -2975,7 +2981,9 @@ tcp_mss_update(struct tcpcb *tp) bufsize = roundup(bufsize, mss); if (bufsize > sb_max) bufsize = sb_max; + mtx_enter(&so->so_mtx); (void)sbreserve(so, &so->so_rcv, bufsize); + mtx_leave(&so->so_mtx); } } Index: sys/netinet/tcp_usrreq.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.181 diff -u -p -u -p -r1.181 tcp_usrreq.c --- sys/netinet/tcp_usrreq.c 30 Apr 2021 13:52:48 -0000 1.181 +++ sys/netinet/tcp_usrreq.c 21 Jul 2021 23:06:33 -0000 @@ -688,7 +688,9 @@ tcp_disconnect(struct tcpcb *tp) tp = tcp_drop(tp, 0); else { soisdisconnecting(so); + mtx_enter(&so->so_mtx); sbflush(so, &so->so_rcv); + mtx_leave(&so->so_mtx); tp = tcp_usrclosed(tp); if (tp) (void) tcp_output(tp); @@ -1115,6 +1117,7 @@ tcp_update_sndspace(struct tcpcb *tp) struct socket *so = tp->t_inpcb->inp_socket; u_long nmax = so->so_snd.sb_hiwat; + mtx_enter(&so->so_mtx); if (sbchecklowmem()) { /* low on memory try to get rid of some */ if (tcp_sendspace < nmax) @@ -1128,7 +1131,7 @@ tcp_update_sndspace(struct tcpcb *tp) tp->snd_una); /* a writable socket must be preserved because of poll(2) semantics */ - if (sbspace(so, &so->so_snd) >= so->so_snd.sb_lowat) { + if (sbspace_locked(so, &so->so_snd) >= so->so_snd.sb_lowat) { if (nmax < so->so_snd.sb_cc + so->so_snd.sb_lowat) nmax = so->so_snd.sb_cc + so->so_snd.sb_lowat; /* keep in sync with sbreserve() calculation */ @@ -1141,6 +1144,7 @@ tcp_update_sndspace(struct tcpcb *tp) if (nmax != so->so_snd.sb_hiwat) sbreserve(so, &so->so_snd, nmax); + mtx_leave(&so->so_mtx); } /* @@ -1179,5 +1183,7 @@ tcp_update_rcvspace(struct tcpcb *tp) /* round to MSS boundary */ nmax = roundup(nmax, tp->t_maxseg); + mtx_enter(&so->so_mtx); sbreserve(so, &so->so_rcv, nmax); + mtx_leave(&so->so_mtx); } Index: sys/sys/socketvar.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/sys/socketvar.h,v retrieving revision 1.98 diff -u -p -u -p -r1.98 socketvar.h --- sys/sys/socketvar.h 7 Jun 2021 09:10:32 -0000 1.98 +++ sys/sys/socketvar.h 21 Jul 2021 23:06:33 -0000 @@ -33,10 +33,12 @@ */ #include /* for struct selinfo */ +#include /* panicstr for MUTEX_ASSERT */ #include #include /* for struct sigio_ref */ #include #include +#include #include #ifndef _SOCKLEN_T_DEFINED_ @@ -51,10 +53,15 @@ TAILQ_HEAD(soqhead, socket); * Contains send and receive buffer queues, * handle on protocol and pointer to protocol * private data and error information. + * + * Locks used to protect struct members in this file: + * s this socket solock + * m this socket `so_mtx' */ struct socket { const struct protosw *so_proto; /* protocol handle */ struct rwlock so_lock; /* this socket lock */ + struct mutex so_mtx; void *so_pcb; /* protocol control block */ u_int so_state; /* internal state flags SS_*, below */ short so_type; /* generic type, see socket.h */ @@ -101,13 +108,13 @@ struct socket { struct sockbuf { /* The following fields are all zeroed on flush. */ #define sb_startzero sb_cc - u_long sb_cc; /* actual chars in buffer */ - u_long sb_datacc; /* data only chars in buffer */ - u_long sb_hiwat; /* max actual char count */ - u_long sb_wat; /* default watermark */ - u_long sb_mbcnt; /* chars of mbufs used */ - u_long sb_mbmax; /* max chars of mbufs to use */ - long sb_lowat; /* low water mark */ + u_long sb_cc; /* [s|m] actual chars in buffer */ + u_long sb_datacc; /* [s|m] data only chars in buffer */ + u_long sb_hiwat; /* [s|m] max actual char count */ + u_long sb_wat; /* [s|m] default watermark */ + u_long sb_mbcnt; /* [s|m] chars of mbufs used */ + u_long sb_mbmax; /* [s|m] max chars of mbufs to use */ + long sb_lowat; /* [s|m] low water mark */ struct mbuf *sb_mb; /* the mbuf chain */ struct mbuf *sb_mbtail; /* the last mbuf in the chain */ struct mbuf *sb_lastrecord;/* first mbuf of last record in @@ -189,13 +196,27 @@ sb_notify(struct socket *so, struct sock * overflow and return 0. */ static inline long -sbspace(struct socket *so, struct sockbuf *sb) +sbspace_locked(struct socket *so, struct sockbuf *sb) { KASSERT(sb == &so->so_rcv || sb == &so->so_snd); - soassertlocked(so); + MUTEX_ASSERT_LOCKED(&so->so_mtx); + return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt); } +static inline long +sbspace(struct socket *so, struct sockbuf *sb) +{ + long space; + + mtx_enter(&so->so_mtx); + space = sbspace_locked(so, sb); + mtx_leave(&so->so_mtx); + + return space; +} + + /* do we have to send all at once on a socket? */ #define sosendallatonce(so) \ ((so)->so_proto->pr_flags & PR_ATOMIC) @@ -223,17 +244,20 @@ soreadable(struct socket *so) ((so)->so_state & SS_CANTSENDMORE) || (so)->so_error) /* adjust counters in sb reflecting allocation of m */ -#define sballoc(sb, m) do { \ +#define sballoc(so, sb, m) do { \ + mtx_enter(&(so)->so_mtx); \ (sb)->sb_cc += (m)->m_len; \ if ((m)->m_type != MT_CONTROL && (m)->m_type != MT_SONAME) \ (sb)->sb_datacc += (m)->m_len; \ (sb)->sb_mbcnt += MSIZE; \ if ((m)->m_flags & M_EXT) \ (sb)->sb_mbcnt += (m)->m_ext.ext_size; \ + mtx_leave(&(so)->so_mtx); \ } while (/* CONSTCOND */ 0) /* adjust counters in sb reflecting freeing of m */ -#define sbfree(sb, m) do { \ +#define sbfree_locked(so, sb, m) do { \ + MUTEX_ASSERT_LOCKED(&so->so_mtx); \ (sb)->sb_cc -= (m)->m_len; \ if ((m)->m_type != MT_CONTROL && (m)->m_type != MT_SONAME) \ (sb)->sb_datacc -= (m)->m_len; \ @@ -242,6 +266,12 @@ soreadable(struct socket *so) (sb)->sb_mbcnt -= (m)->m_ext.ext_size; \ } while (/* CONSTCOND */ 0) +#define sbfree(so, sb, m) do { \ + mtx_enter(&(so)->so_mtx); \ + sbfree_locked((so), (sb), (m)); \ + mtx_leave(&(so)->so_mtx); \ +} while (/* CONSTCOND */ 0) + /* * Set lock on sockbuf sb; sleep if lock is already held. * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. @@ -287,13 +317,13 @@ int sbappendaddr(struct socket *, struct int sbappendcontrol(struct socket *, struct sockbuf *, struct mbuf *, struct mbuf *); void sbappendrecord(struct socket *, struct sockbuf *, struct mbuf *); -void sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *); +void sbcompress(struct socket *, struct sockbuf *, struct mbuf *, + struct mbuf *); struct mbuf * sbcreatecontrol(const void *, size_t, int, int); void sbdrop(struct socket *, struct sockbuf *, int); -void sbdroprecord(struct sockbuf *); +void sbdroprecord(struct socket *, struct sockbuf *); void sbflush(struct socket *, struct sockbuf *); -void sbinsertoob(struct sockbuf *, struct mbuf *); void sbrelease(struct socket *, struct sockbuf *); int sbcheckreserve(u_long, u_long); int sbchecklowmem(void); @@ -349,12 +379,12 @@ void sblastrecordchk(struct sockbuf *, c void sblastmbufchk(struct sockbuf *, const char *); #define SBLASTMBUFCHK(sb, where) sblastmbufchk((sb), (where)) -void sbcheck(struct sockbuf *); -#define SBCHECK(sb) sbcheck(sb) +void sbcheck(struct socket *, struct sockbuf *); +#define SBCHECK(so, sb) sbcheck(so, sb) #else #define SBLASTRECORDCHK(sb, where) /* nothing */ #define SBLASTMBUFCHK(sb, where) /* nothing */ -#define SBCHECK(sb) /* nothing */ +#define SBCHECK(so, sb) /* nothing */ #endif /* SOCKBUF_DEBUG */ #endif /* _KERNEL */