Sun Mar 8 09:47:28 2020 UTC ()
Pull up following revision(s) (requested by mlelstv in ticket #1515):

	sys/kern/sys_select.c: revision 1.42-1.45

PR/54158: Anthony Mallet: poll(2) does not allow polling all possible fds
(hardcoded limit to 1000 + #<open-fds>). Changed to limit by the max of
the resource limit of open descriptors and the above.

Remove the slop code. Suggested by mrg@

Use the max limit (aka maxfiles or the moral equivalent of OPEN_MAX) which
makes poll(2) align with the Posix documentation (which allows EINVAL if
nfds > OPEN_MAX). From: Anthony Mallet

Add slop of 1000 and explain why.


(martin)
diff -r1.40 -r1.40.2.1 src/sys/kern/sys_select.c

cvs diff -r1.40 -r1.40.2.1 src/sys/kern/sys_select.c (expand / switch to unified diff)

--- src/sys/kern/sys_select.c 2017/06/01 02:45:13 1.40
+++ src/sys/kern/sys_select.c 2020/03/08 09:47:28 1.40.2.1
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: sys_select.c,v 1.40 2017/06/01 02:45:13 chs Exp $ */ 1/* $NetBSD: sys_select.c,v 1.40.2.1 2020/03/08 09:47:28 martin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2007, 2008, 2009, 2010 The NetBSD Foundation, Inc. 4 * Copyright (c) 2007, 2008, 2009, 2010 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran and Mindaugas Rasiukevicius. 8 * by Andrew Doran and Mindaugas Rasiukevicius.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
@@ -74,27 +74,27 @@ @@ -74,27 +74,27 @@
74 * 74 *
75 * The <object-lock> might be a device driver or another subsystem, e.g. 75 * The <object-lock> might be a device driver or another subsystem, e.g.
76 * socket or pipe. This lock is not exported, and thus invisible to this 76 * socket or pipe. This lock is not exported, and thus invisible to this
77 * subsystem. Mainly, synchronisation between selrecord() and selnotify() 77 * subsystem. Mainly, synchronisation between selrecord() and selnotify()
78 * routines depends on this lock, as it will be described in the comments. 78 * routines depends on this lock, as it will be described in the comments.
79 * 79 *
80 * Lock order 80 * Lock order
81 * 81 *
82 * <object-lock> -> 82 * <object-lock> ->
83 * selcluster_t::sc_lock 83 * selcluster_t::sc_lock
84 */ 84 */
85 85
86#include <sys/cdefs.h> 86#include <sys/cdefs.h>
87__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.40 2017/06/01 02:45:13 chs Exp $"); 87__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.40.2.1 2020/03/08 09:47:28 martin Exp $");
88 88
89#include <sys/param.h> 89#include <sys/param.h>
90#include <sys/systm.h> 90#include <sys/systm.h>
91#include <sys/filedesc.h> 91#include <sys/filedesc.h>
92#include <sys/file.h> 92#include <sys/file.h>
93#include <sys/proc.h> 93#include <sys/proc.h>
94#include <sys/socketvar.h> 94#include <sys/socketvar.h>
95#include <sys/signalvar.h> 95#include <sys/signalvar.h>
96#include <sys/uio.h> 96#include <sys/uio.h>
97#include <sys/kernel.h> 97#include <sys/kernel.h>
98#include <sys/lwp.h> 98#include <sys/lwp.h>
99#include <sys/poll.h> 99#include <sys/poll.h>
100#include <sys/mount.h> 100#include <sys/mount.h>
@@ -478,35 +478,48 @@ sys___pollts50(struct lwp *l, const stru @@ -478,35 +478,48 @@ sys___pollts50(struct lwp *l, const stru
478 478
479 return pollcommon(retval, SCARG(uap, fds), SCARG(uap, nfds), ts, mask); 479 return pollcommon(retval, SCARG(uap, fds), SCARG(uap, nfds), ts, mask);
480} 480}
481 481
482int 482int
483pollcommon(register_t *retval, struct pollfd *u_fds, u_int nfds, 483pollcommon(register_t *retval, struct pollfd *u_fds, u_int nfds,
484 struct timespec *ts, sigset_t *mask) 484 struct timespec *ts, sigset_t *mask)
485{ 485{
486 struct pollfd smallfds[32]; 486 struct pollfd smallfds[32];
487 struct pollfd *fds; 487 struct pollfd *fds;
488 int error; 488 int error;
489 size_t ni; 489 size_t ni;
490 490
491 if (nfds > 1000 + curlwp->l_fd->fd_dt->dt_nfiles) { 491 if (nfds > curlwp->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_max + 1000) {
492 /* 492 /*
493 * Either the user passed in a very sparse 'fds' or junk! 493 * Prevent userland from causing over-allocation.
494 * The kmem_alloc() call below would be bad news. 494 * Raising the default limit too high can still cause
495 * We could process the 'fds' array in chunks, but that 495 * a lot of memory to be allocated, but this also means
 496 * that the file descriptor array will also be large.
 497 *
 498 * To reduce the memory requirements here, we could
 499 * process the 'fds' array in chunks, but that
496 * is a lot of code that isn't normally useful. 500 * is a lot of code that isn't normally useful.
497 * (Or just move the copyin/out into pollscan().) 501 * (Or just move the copyin/out into pollscan().)
 502 *
498 * Historically the code silently truncated 'fds' to 503 * Historically the code silently truncated 'fds' to
499 * dt_nfiles entries - but that does cause issues. 504 * dt_nfiles entries - but that does cause issues.
 505 *
 506 * Using the max limit equivalent to sysctl
 507 * kern.maxfiles is the moral equivalent of OPEN_MAX
 508 * as specified by POSIX.
 509 *
 510 * We add a slop of 1000 in case the resource limit was
 511 * changed after opening descriptors or the same descriptor
 512 * was specified more than once.
500 */ 513 */
501 return EINVAL; 514 return EINVAL;
502 } 515 }
503 ni = nfds * sizeof(struct pollfd); 516 ni = nfds * sizeof(struct pollfd);
504 if (ni > sizeof(smallfds)) 517 if (ni > sizeof(smallfds))
505 fds = kmem_alloc(ni, KM_SLEEP); 518 fds = kmem_alloc(ni, KM_SLEEP);
506 else 519 else
507 fds = smallfds; 520 fds = smallfds;
508 521
509 error = copyin(u_fds, fds, ni); 522 error = copyin(u_fds, fds, ni);
510 if (error) 523 if (error)
511 goto fail; 524 goto fail;
512 525