This patch is generated from the commloops-2_6 branch of HEAD in squid
Mon Mar 12 01:18:30 2007 GMT
See http://devel.squid-cache.org/

Index: squid/src/comm.c
diff -u squid/src/comm.c:1.48 squid/src/comm.c:1.41.2.2
--- squid/src/comm.c:1.48	Sat Sep  9 05:50:47 2006
+++ squid/src/comm.c	Sat Sep 30 17:59:26 2006
@@ -1036,8 +1036,7 @@
 void
 comm_init(void)
 {
-    fd_table = xcalloc(Squid_MaxFD, sizeof(fde));
-    /* XXX account fd_table */
+    fd_init();
     /* Keep a few file descriptors free so that we don't run out of FD's
      * after accepting a client but before it opens a socket or a file.
      * Since Squid_MaxFD can be as high as several thousand, don't waste them */
Index: squid/src/comm_epoll.c
diff -u squid/src/comm_epoll.c:1.21 squid/src/comm_epoll.c:1.17.2.6
--- squid/src/comm_epoll.c:1.21	Tue Aug 15 12:52:39 2006
+++ squid/src/comm_epoll.c	Mon Oct 23 04:10:47 2006
@@ -65,8 +65,8 @@
     }
 }
 
-void
-comm_select_init()
+static void
+do_select_init()
 {
     kdpfd = epoll_create(Squid_MaxFD);
     if (kdpfd < 0)
@@ -83,8 +83,8 @@
     debug(5, 1) ("Using epoll for the IO loop\n");
 }
 
-void
-comm_select_shutdown()
+static void
+do_select_shutdown()
 {
     fd_close(kdpfd);
     close(kdpfd);
@@ -153,37 +153,21 @@
     }
 }
 
-int
-comm_select(int msec)
+static int
+do_comm_select(int msec)
 {
-    static time_t last_timeout = 0;
     int i;
     int num;
     int fd;
     struct epoll_event *cevents;
-    double start = current_dtime;
-
-    if (msec > MAX_POLL_TIME)
-	msec = MAX_POLL_TIME;
-
-    debug(5, 3) ("comm_select: timeout %d\n", msec);
 
     if (epoll_fds == 0) {
 	assert(shutting_down);
 	return COMM_SHUTDOWN;
     }
-    /* Check for disk io callbacks */
-    storeDirCallback();
 
-    /* Check timeouts once per second */
-    if (last_timeout != squid_curtime) {
-	last_timeout = squid_curtime;
-	checkTimeouts();
-    }
     statCounter.syscalls.polls++;
     num = epoll_wait(kdpfd, events, MAX_EVENTS, msec);
-    statCounter.select_loops++;
-
     if (num < 0) {
 	getCurrentTime();
 	if (ignoreErrno(errno))
@@ -194,17 +178,13 @@
     }
     statHistCount(&statCounter.select_fds_hist, num);
 
-    if (num > 0) {
-	for (i = 0, cevents = events; i < num; i++, cevents++) {
-	    fd = cevents->data.fd;
-	    comm_call_handlers(fd, cevents->events & ~EPOLLOUT, cevents->events & ~EPOLLIN);
-	}
-	getCurrentTime();
-	statCounter.select_time += (current_dtime - start);
-	return COMM_OK;
-    } else {
-	getCurrentTime();
-	debug(5, 8) ("comm_select: time out: %ld.\n", (long int) squid_curtime);
+    if (num == 0)
 	return COMM_TIMEOUT;
+
+    for (i = 0, cevents = events; i < num; i++, cevents++) {
+	fd = cevents->data.fd;
+	comm_call_handlers(fd, cevents->events & ~EPOLLOUT, cevents->events & ~EPOLLIN);
     }
+
+    return COMM_OK;
 }
Index: squid/src/comm_generic.c
diff -u squid/src/comm_generic.c:1.5 squid/src/comm_generic.c:1.2.2.6
--- squid/src/comm_generic.c:1.5	Tue Jun 27 04:52:53 2006
+++ squid/src/comm_generic.c	Mon Oct 23 04:10:47 2006
@@ -39,6 +39,33 @@
 
 static int MAX_POLL_TIME = 1000;	/* see also comm_quick_poll_required() */
 
+#if DELAY_POOLS
+static int *slow_fds = NULL;
+static int n_slow_fds = 0;
+#endif
+
+static void do_select_init(void);
+
+void
+comm_select_init(void)
+{
+#if DELAY_POOLS
+    slow_fds = xmalloc(sizeof(int) * Squid_MaxFD);
+#endif
+    do_select_init();
+}
+
+static void do_select_shutdown(void);
+
+void
+comm_select_shutdown(void)
+{
+    do_select_shutdown();
+#if DELAY_POOLS
+    safe_free(slow_fds);
+#endif
+}
+
 /* Defer reads from this fd */
 void
 commDeferFD(int fd)
@@ -84,7 +111,104 @@
     return F->defer_check(fd, F->defer_data);
 }
 
+#if DELAY_POOLS
 static void
+commAddSlow(int fd)
+{
+    fde *F = &fd_table[fd];
+    if (F->slow_id)
+	return;
+    F->slow_id = ++n_slow_fds;
+    assert(n_slow_fds < Squid_MaxFD);
+    slow_fds[n_slow_fds] = fd;
+}
+
+void
+commRemoveSlow(int fd)
+{
+    int fd2;
+    fde *F = &fd_table[fd];
+    if (!F->slow_id)
+	return;
+    fd2 = slow_fds[n_slow_fds--];
+    if (F->slow_id <= n_slow_fds) {
+	slow_fds[F->slow_id] = fd2;
+	fde *F2 = &fd_table[fd2];
+	F2->slow_id = F->slow_id;
+    }
+    F->slow_id = 0;
+}
+#endif
+
+static int comm_select_handled;
+
+static inline int do_comm_select(int msec);
+
+static inline void comm_call_handlers(int fd, int read_event, int write_event);
+
+static inline void do_call_incoming(int fd)
+{
+    fde *F = &fd_table[fd];
+    if (!F->flags.backoff)
+	comm_call_handlers(fd, -1, -1);
+}
+
+static void do_check_incoming(void)
+{
+    int i;
+    for (i = 0; i < NHttpSockets; i++)
+	do_call_incoming(HttpSockets[i]);
+    if (theInIcpConnection >= 0)
+	do_call_incoming(theInIcpConnection);
+    if (theOutIcpConnection != theInIcpConnection)
+	do_call_incoming(theOutIcpConnection);
+}
+
+static inline void check_incoming(void)
+{
+    comm_select_handled++;
+    if (comm_select_handled > 30 && comm_select_handled > NHttpSockets << 2) 
+	do_check_incoming();
+}
+
+#if DELAY_POOLS
+static void
+comm_call_slowfds(void)
+{
+    while (n_slow_fds) {
+	int i = (squid_random() % n_slow_fds) + 1;
+	int fd = slow_fds[i];
+	fde *F = &fd_table[fd];
+	commRemoveSlow(fd);
+	if (F->read_handler) {
+	    PF *hdl = F->read_handler;
+	    void *hdl_data = F->read_data;
+	    debug(5, 8) ("comm_call_handlers(): Calling read handler on fd=%d\n", fd);
+#if SIMPLE_COMM_HANDLER
+	    commUpdateReadHandler(fd, NULL, NULL);
+	    commResumeFD(fd);
+	    hdl(fd, hdl_data);
+#else
+	    /* Optimized version to avoid the fd bouncing in/out of the waited set */
+	    F->read_handler = NULL;
+	    F->read_data = NULL;
+	    F->read_pending = COMM_PENDING_NORMAL;
+	    hdl(fd, hdl_data);
+	    /* backoff check is for delayed connections kicked alive from checkTimeouts */
+	    if (F->flags.open && (!F->read_handler || F->flags.backoff)) {
+		if (F->flags.backoff && commDeferRead(fd) != 1)
+		    F->flags.backoff = 0;
+		commUpdateEvents(fd);
+	    }
+#endif
+	    statCounter.select_fds++;
+	    check_incoming();
+	}
+    }
+}
+#endif
+
+static inline void
 comm_call_handlers(int fd, int read_event, int write_event)
 {
     fde *F = &fd_table[fd];
@@ -109,13 +233,18 @@
 	    void *hdl_data = F->read_data;
 	    /* If the descriptor is meant to be deferred, don't handle */
 	    switch (commDeferRead(fd)) {
-	    case 1:
+#if DELAY_POOLS
+	    case -1:
+		commAddSlow(fd);
+		break;
+#endif
+	    default:
 		if (!(F->flags.backoff)) {
 		    debug(5, 1) ("comm_call_handlers(): WARNING defer handler for fd=%d (desc=%s) does not call commDeferFD() - backing off manually\n", fd, F->desc);
 		    commDeferFD(fd);
 		}
 		break;
-	    default:
+	    case 0:
 		debug(5, 8) ("comm_call_handlers(): Calling read handler on fd=%d\n", fd);
 #if SIMPLE_COMM_HANDLER
 		commUpdateReadHandler(fd, NULL, NULL);
@@ -130,6 +259,8 @@
 		    commUpdateEvents(fd);
 #endif
 		statCounter.select_fds++;
+		check_incoming();
+		break;
 	    }
 	}
     }
@@ -163,6 +294,7 @@
 		commUpdateEvents(fd);
 #endif
 	    statCounter.select_fds++;
+	    check_incoming();
 	}
     }
 }
@@ -173,17 +305,32 @@
     int fd;
     fde *F = NULL;
     PF *callback;
+#if DELAY_POOLS
+    delayPoolsUpdate(NULL);
+#endif
     for (fd = 0; fd <= Biggest_FD; fd++) {
 	F = &fd_table[fd];
 	if (!F->flags.open)
 	    continue;
-	if (F->flags.backoff)
-	    commResumeFD(fd);
+	if (F->flags.backoff) {
+	    switch (commDeferRead(fd)) {
+	    case 0:
+		commResumeFD(fd);
+		break;
+#if DELAY_POOLS
+	    case -1:
+		commAddSlow(fd);
+		break;
+#endif
+	    }
+	}
 	if (F->timeout == 0)
 	    continue;
 	if (F->timeout > squid_curtime)
 	    continue;
 	debug(5, 5) ("checkTimeouts: FD %d Expired\n", fd);
+	if (F->flags.backoff)
+	    commResumeFD(fd);
 	if (F->timeout_handler) {
 	    debug(5, 5) ("checkTimeouts: FD %d: Call timeout handler\n", fd);
 	    callback = F->timeout_handler;
@@ -197,6 +344,51 @@
 }
 
 
+int
+comm_select(int msec)
+{
+    static time_t last_timeout = 0;
+    int rc;
+    double start = current_dtime;
+
+    debug(5, 3) ("comm_select: timeout %d\n", msec);
+
+    if (msec > MAX_POLL_TIME)
+	msec = MAX_POLL_TIME;
+
+#if DELAY_POOLS
+    /* We have delayed fds in queue? */
+    if (n_slow_fds)
+	msec = 0;
+#endif
+
+    statCounter.select_loops++;
+
+    /* Check for disk io callbacks */
+    storeDirCallback();
+
+    /* Check timeouts once per second */
+    if (last_timeout != squid_curtime) {
+	last_timeout = squid_curtime;
+	checkTimeouts();
+    }
+
+    comm_select_handled = 0;
+
+    rc = do_comm_select(msec);
+
+#if DELAY_POOLS
+    comm_call_slowfds();
+#endif
+    getCurrentTime();
+    statCounter.select_time += (current_dtime - start);
+
+    if (rc == COMM_TIMEOUT)
+	debug(5, 8) ("comm_select: time out\n");
+
+    return rc;
+}
+
 /* Called by async-io or diskd to speed up the polling */
 void
 comm_quick_poll_required(void)
Index: squid/src/comm_kqueue.c
diff -u squid/src/comm_kqueue.c:1.10 squid/src/comm_kqueue.c:1.6.2.4
--- squid/src/comm_kqueue.c:1.10	Tue Aug 15 12:52:39 2006
+++ squid/src/comm_kqueue.c	Mon Oct 23 04:10:47 2006
@@ -50,8 +50,8 @@
 static int kqoff;		/* offset into the buffer */
 static unsigned *kqueue_state;	/* keep track of the kqueue state */
 
-void
-comm_select_init()
+static void
+do_select_init()
 {
     kq = kqueue();
     if (kq < 0)
@@ -71,8 +71,8 @@
     debug(5, 1) ("Using kqueue for the IO loop\n");
 }
 
-void
-comm_select_shutdown()
+static void
+do_select_shutdown()
 {
     fd_close(kq);
     close(kq);
@@ -123,37 +123,20 @@
     }
 }
 
-int
-comm_select(int msec)
+static int
+do_comm_select(int msec)
 {
-    static time_t last_timeout = 0;
     int i;
     int num;
-    double start = current_dtime;
     static struct kevent ke[KE_LENGTH];
     struct timespec timeout;
 
-    if (msec > MAX_POLL_TIME)
-	msec = MAX_POLL_TIME;
-
     timeout.tv_sec = msec / 1000;
     timeout.tv_nsec = (msec % 1000) * 1000000;
 
-    debug(5, 3) ("comm_select: timeout %d\n", msec);
-
-    /* Check for disk io callbacks */
-    storeDirCallback();
-
-    /* Check timeouts once per second */
-    if (last_timeout != squid_curtime) {
-	last_timeout = squid_curtime;
-	checkTimeouts();
-    }
     statCounter.syscalls.polls++;
     num = kevent(kq, kqlst, kqoff, ke, KE_LENGTH, &timeout);
-    statCounter.select_loops++;
     kqoff = 0;
-
     if (num < 0) {
 	getCurrentTime();
 	if (ignoreErrno(errno))
@@ -162,11 +145,9 @@
 	debug(5, 1) ("comm_select: kevent failure: %s\n", xstrerror());
 	return COMM_ERROR;
     }
-    if (num == 0) {
-	getCurrentTime();
-	return COMM_TIMEOUT;
-    }
     statHistCount(&statCounter.select_fds_hist, num);
+    if (num == 0)
+	return COMM_TIMEOUT;
 
     for (i = 0; i < num; i++) {
 	int fd = (int) ke[i].ident;
@@ -190,7 +171,5 @@
 	}
     }
 
-    getCurrentTime();
-    statCounter.select_time += (current_dtime - start);
     return COMM_OK;
 }
Index: squid/src/comm_poll.c
diff -u squid/src/comm_poll.c:1.20 squid/src/comm_poll.c:1.14.2.5
--- squid/src/comm_poll.c:1.20	Tue Aug 15 12:52:39 2006
+++ squid/src/comm_poll.c	Mon Oct 23 04:10:47 2006
@@ -33,6 +33,7 @@
  */
 
 #include "squid.h"
+#include "comm_generic.c"
 
 #if HAVE_SYS_POLL_H
 #include <sys/poll.h>
@@ -40,539 +41,19 @@
 #include <poll.h>
 #endif
 
-static int MAX_POLL_TIME = 1000;	/* see also comm_quick_poll_required() */
-
-/* STATIC */
-static int fdIsHttp(int fd);
-static int fdIsIcp(int fd);
-static int fdIsDns(int fd);
-static int commDeferRead(int fd);
-static void checkTimeouts(void);
-static OBJH commIncomingStats;
-static int comm_check_incoming_poll_handlers(int nfds, int *fds);
-static void comm_poll_dns_incoming(void);
-
-/*
- * Automatic tuning for incoming requests:
- *
- * INCOMING sockets are the ICP and HTTP ports.  We need to check these
- * fairly regularly, but how often?  When the load increases, we
- * want to check the incoming sockets more often.  If we have a lot
- * of incoming ICP, then we need to check these sockets more than
- * if we just have HTTP.
- *
- * The variables 'incoming_icp_interval' and 'incoming_http_interval' 
- * determine how many normal I/O events to process before checking
- * incoming sockets again.  Note we store the incoming_interval
- * multipled by a factor of (2^INCOMING_FACTOR) to have some
- * pseudo-floating point precision.
- *
- * The variable 'icp_io_events' and 'http_io_events' counts how many normal
- * I/O events have been processed since the last check on the incoming
- * sockets.  When io_events > incoming_interval, its time to check incoming
- * sockets.
- *
- * Every time we check incoming sockets, we count how many new messages
- * or connections were processed.  This is used to adjust the
- * incoming_interval for the next iteration.  The new incoming_interval
- * is calculated as the current incoming_interval plus what we would
- * like to see as an average number of events minus the number of
- * events just processed.
- *
- *  incoming_interval = incoming_interval + target_average - number_of_events_processed
- *
- * There are separate incoming_interval counters for both HTTP and ICP events
- * 
- * You can see the current values of the incoming_interval's, as well as
- * a histogram of 'incoming_events' by asking the cache manager
- * for 'comm_incoming', e.g.:
- *
- *      % ./client mgr:comm_incoming
- *
- * Caveats:
- *
- *      - We have MAX_INCOMING_INTEGER as a magic upper limit on
- *        incoming_interval for both types of sockets.  At the
- *        largest value the cache will effectively be idling.
- *
- *      - The higher the INCOMING_FACTOR, the slower the algorithm will
- *        respond to load spikes/increases/decreases in demand. A value
- *        between 3 and 8 is recommended.
- */
-
-#define MAX_INCOMING_INTEGER 256
-#define INCOMING_FACTOR 5
-#define MAX_INCOMING_INTERVAL (MAX_INCOMING_INTEGER << INCOMING_FACTOR)
-static int icp_io_events = 0;
-static int dns_io_events = 0;
-static int http_io_events = 0;
-static int incoming_icp_interval = 16 << INCOMING_FACTOR;
-static int incoming_dns_interval = 16 << INCOMING_FACTOR;
-static int incoming_http_interval = 16 << INCOMING_FACTOR;
-#define commCheckICPIncoming (++icp_io_events > (incoming_icp_interval>> INCOMING_FACTOR))
-#define commCheckDNSIncoming (++dns_io_events > (incoming_dns_interval>> INCOMING_FACTOR))
-#define commCheckHTTPIncoming (++http_io_events > (incoming_http_interval>> INCOMING_FACTOR))
-
-static int
-fdIsIcp(int fd)
-{
-    if (fd == theInIcpConnection)
-	return 1;
-    if (fd == theOutIcpConnection)
-	return 1;
-    return 0;
-}
-
-static int
-fdIsDns(int fd)
-{
-    if (fd == DnsSocket)
-	return 1;
-    return 0;
-}
-
-static int
-fdIsHttp(int fd)
-{
-    int j;
-    for (j = 0; j < NHttpSockets; j++) {
-	if (fd == HttpSockets[j])
-	    return 1;
-    }
-    return 0;
-}
-
-#if DELAY_POOLS
-static int slowfdcnt = 0;
-static int slowfdarr[SQUID_MAXFD];
+static struct pollfd *pfds;
+static int *pfd_map;
+static int nfds = 0;
 
 static void
-commAddSlowFd(int fd)
-{
-    assert(slowfdcnt < SQUID_MAXFD);
-    slowfdarr[slowfdcnt++] = fd;
-}
-
-static int
-commGetSlowFd(void)
-{
-    int whichfd, retfd;
-
-    if (!slowfdcnt)
-	return -1;
-    whichfd = squid_random() % slowfdcnt;
-    retfd = slowfdarr[whichfd];
-    slowfdarr[whichfd] = slowfdarr[--slowfdcnt];
-    return retfd;
-}
-#endif
-
-static int
-comm_check_incoming_poll_handlers(int nfds, int *fds)
+do_select_init()
 {
     int i;
-    int fd;
-    PF *hdl = NULL;
-    int npfds;
-    struct pollfd pfds[3 + MAXHTTPPORTS];
-    incoming_sockets_accepted = 0;
-    for (i = npfds = 0; i < nfds; i++) {
-	int events;
-	fd = fds[i];
-	events = 0;
-	if (fd_table[fd].read_handler)
-	    events |= POLLRDNORM;
-	if (fd_table[fd].write_handler)
-	    events |= POLLWRNORM;
-	if (events) {
-	    pfds[npfds].fd = fd;
-	    pfds[npfds].events = events;
-	    pfds[npfds].revents = 0;
-	    npfds++;
-	}
-    }
-    if (!nfds)
-	return -1;
-    getCurrentTime();
-    statCounter.syscalls.polls++;
-    if (poll(pfds, npfds, 0) < 1)
-	return incoming_sockets_accepted;
-    for (i = 0; i < npfds; i++) {
-	int revents;
-	if (((revents = pfds[i].revents) == 0) || ((fd = pfds[i].fd) == -1))
-	    continue;
-	if (revents & (POLLRDNORM | POLLIN | POLLHUP | POLLERR)) {
-	    if ((hdl = fd_table[fd].read_handler)) {
-		fd_table[fd].read_handler = NULL;
-		hdl(fd, fd_table[fd].read_data);
-	    } else if (pfds[i].events & POLLRDNORM)
-		debug(5, 1) ("comm_poll_incoming: FD %d NULL read handler\n",
-		    fd);
-	}
-	if (revents & (POLLWRNORM | POLLOUT | POLLHUP | POLLERR)) {
-	    if ((hdl = fd_table[fd].write_handler)) {
-		fd_table[fd].write_handler = NULL;
-		hdl(fd, fd_table[fd].write_data);
-	    } else if (pfds[i].events & POLLWRNORM)
-		debug(5, 1) ("comm_poll_incoming: FD %d NULL write_handler\n",
-		    fd);
-	}
-    }
-    return incoming_sockets_accepted;
-}
-
-static void
-comm_poll_icp_incoming(void)
-{
-    int nfds = 0;
-    int fds[2];
-    int nevents;
-    icp_io_events = 0;
-    if (theInIcpConnection >= 0)
-	fds[nfds++] = theInIcpConnection;
-    if (theInIcpConnection != theOutIcpConnection)
-	if (theOutIcpConnection >= 0)
-	    fds[nfds++] = theOutIcpConnection;
-    if (nfds == 0)
-	return;
-    nevents = comm_check_incoming_poll_handlers(nfds, fds);
-    incoming_icp_interval += Config.comm_incoming.icp_average - nevents;
-    if (incoming_icp_interval < Config.comm_incoming.icp_min_poll)
-	incoming_icp_interval = Config.comm_incoming.icp_min_poll;
-    if (incoming_icp_interval > MAX_INCOMING_INTERVAL)
-	incoming_icp_interval = MAX_INCOMING_INTERVAL;
-    if (nevents > INCOMING_ICP_MAX)
-	nevents = INCOMING_ICP_MAX;
-    statHistCount(&statCounter.comm_icp_incoming, nevents);
-}
-
-static void
-comm_poll_http_incoming(void)
-{
-    int nfds = 0;
-    int fds[MAXHTTPPORTS];
-    int j;
-    int nevents;
-    http_io_events = 0;
-    for (j = 0; j < NHttpSockets; j++) {
-	if (HttpSockets[j] < 0)
-	    continue;
-	if (commDeferRead(HttpSockets[j]))
-	    continue;
-	fds[nfds++] = HttpSockets[j];
-    }
-    nevents = comm_check_incoming_poll_handlers(nfds, fds);
-    incoming_http_interval = incoming_http_interval
-	+ Config.comm_incoming.http_average - nevents;
-    if (incoming_http_interval < Config.comm_incoming.http_min_poll)
-	incoming_http_interval = Config.comm_incoming.http_min_poll;
-    if (incoming_http_interval > MAX_INCOMING_INTERVAL)
-	incoming_http_interval = MAX_INCOMING_INTERVAL;
-    if (nevents > INCOMING_HTTP_MAX)
-	nevents = INCOMING_HTTP_MAX;
-    statHistCount(&statCounter.comm_http_incoming, nevents);
-}
-
-/* poll all sockets; call handlers for those that are ready. */
-int
-comm_select(int msec)
-{
-    struct pollfd pfds[SQUID_MAXFD];
-#if DELAY_POOLS
-    char slowfds[SQUID_MAXFD];
-#endif
-    int fd;
-    unsigned int i;
-    unsigned int maxfd;
-    unsigned int nfds;
-    unsigned int npending;
-    int num;
-    int callicp = 0, callhttp = 0;
-    int calldns = 0;
-    static time_t last_timeout = 0;
-    double timeout = current_dtime + (msec / 1000.0);
-    do {
-	double start;
-	getCurrentTime();
-	start = current_dtime;
-	/* Handle any fs callbacks that need doing */
-	storeDirCallback();
-#if DELAY_POOLS
-	memset(slowfds, 0, Biggest_FD);
-#endif
-	if (commCheckICPIncoming)
-	    comm_poll_icp_incoming();
-	if (commCheckDNSIncoming)
-	    comm_poll_dns_incoming();
-	if (commCheckHTTPIncoming)
-	    comm_poll_http_incoming();
-	callicp = calldns = callhttp = 0;
-	nfds = 0;
-	npending = 0;
-	maxfd = Biggest_FD + 1;
-	for (i = 0; i < maxfd; i++) {
-	    int events;
-	    events = 0;
-	    /* Check each open socket for a handler. */
-	    if (fd_table[i].read_handler) {
-		int dopoll = 1;
-		switch (commDeferRead(i)) {
-		case 0:
-		    break;
-		case 1:
-		    dopoll = 0;
-		    break;
-#if DELAY_POOLS
-		case -1:
-		    slowfds[i] = 1;
-		    break;
-#endif
-		default:
-		    fatalf("bad return value from commDeferRead(FD %d)\n", i);
-		    /* NOTREACHED */
-		}
-		if (dopoll) {
-		    switch (fd_table[i].read_pending) {
-		    case COMM_PENDING_NORMAL:
-			events |= POLLRDNORM;
-			break;
-		    case COMM_PENDING_WANTS_WRITE:
-			events |= POLLWRNORM;
-			break;
-		    case COMM_PENDING_WANTS_READ:
-			events |= POLLRDNORM;
-			break;
-		    case COMM_PENDING_NOW:
-			events |= POLLRDNORM;
-			npending++;
-			break;
-		    }
-		}
-	    }
-	    if (fd_table[i].write_handler) {
-		switch (fd_table[i].write_pending) {
-		case COMM_PENDING_NORMAL:
-		    events |= POLLWRNORM;
-		    break;
-		case COMM_PENDING_WANTS_WRITE:
-		    events |= POLLWRNORM;
-		    break;
-		case COMM_PENDING_WANTS_READ:
-		    events |= POLLRDNORM;
-		    break;
-		case COMM_PENDING_NOW:
-		    events |= POLLWRNORM;
-		    npending++;
-		    break;
-		}
-	    }
-	    if (events) {
-		pfds[nfds].fd = i;
-		pfds[nfds].events = events;
-		pfds[nfds].revents = 0;
-		nfds++;
-	    }
-	}
-	if (nfds == 0) {
-	    assert(shutting_down);
-	    return COMM_SHUTDOWN;
-	}
-	if (npending)
-	    msec = 0;
-	if (msec > MAX_POLL_TIME)
-	    msec = MAX_POLL_TIME;
-	statCounter.syscalls.polls++;
-	num = poll(pfds, nfds, msec);
-	statCounter.select_loops++;
-	if (num < 0 && !ignoreErrno(errno)) {
-	    debug(5, 0) ("comm_select: poll failure: %s\n", xstrerror());
-	    assert(errno != EINVAL);
-	    return COMM_ERROR;
-	    /* NOTREACHED */
-	}
-	debug(5, num ? 5 : 8) ("comm_select: %d+%u FDs ready\n", num, npending);
-	statHistCount(&statCounter.select_fds_hist, num);
-	/* Check timeout handlers ONCE each second. */
-	if (squid_curtime > last_timeout) {
-	    last_timeout = squid_curtime;
-	    checkTimeouts();
-	}
-	if (num <= 0 && npending == 0)
-	    continue;
-	/* scan each socket but the accept socket. Poll this 
-	 * more frequently to minimize losses due to the 5 connect 
-	 * limit in SunOS */
-	for (i = 0; i < nfds; i++) {
-	    fde *F;
-	    int revents = pfds[i].revents;
-	    fd = pfds[i].fd;
-	    if (fd == -1)
-		continue;
-	    switch (fd_table[fd].read_pending) {
-	    case COMM_PENDING_NORMAL:
-	    case COMM_PENDING_WANTS_READ:
-		break;
-	    case COMM_PENDING_WANTS_WRITE:
-		if (pfds[i].revents & (POLLOUT | POLLWRNORM))
-		    revents |= POLLIN;
-		break;
-	    case COMM_PENDING_NOW:
-		revents |= POLLIN;
-		break;
-	    }
-	    switch (fd_table[fd].write_pending) {
-	    case COMM_PENDING_NORMAL:
-	    case COMM_PENDING_WANTS_WRITE:
-		break;
-	    case COMM_PENDING_WANTS_READ:
-		if (pfds[i].revents & (POLLIN | POLLRDNORM))
-		    revents |= POLLOUT;
-		break;
-	    case COMM_PENDING_NOW:
-		revents |= POLLOUT;
-		break;
-	    }
-	    if (revents == 0)
-		continue;
-	    if (fdIsIcp(fd)) {
-		callicp = 1;
-		continue;
-	    }
-	    if (fdIsDns(fd)) {
-		calldns = 1;
-		continue;
-	    }
-	    if (fdIsHttp(fd)) {
-		callhttp = 1;
-		continue;
-	    }
-	    F = &fd_table[fd];
-	    if (revents & (POLLRDNORM | POLLIN | POLLHUP | POLLERR)) {
-		PF *hdl = F->read_handler;
-		debug(5, 6) ("comm_select: FD %d ready for reading\n", fd);
-		if (hdl == NULL)
-		    (void) 0;	/* Nothing to do */
-#if DELAY_POOLS
-		else if (slowfds[i])
-		    commAddSlowFd(fd);
-#endif
-		else {
-		    F->read_handler = NULL;
-		    F->read_pending = COMM_PENDING_NORMAL;
-		    hdl(fd, F->read_data);
-		    statCounter.select_fds++;
-		    if (commCheckICPIncoming)
-			comm_poll_icp_incoming();
-		    if (commCheckDNSIncoming)
-			comm_poll_dns_incoming();
-		    if (commCheckHTTPIncoming)
-			comm_poll_http_incoming();
-		}
-	    }
-	    if (revents & (POLLWRNORM | POLLOUT | POLLHUP | POLLERR)) {
-		PF *hdl = F->write_handler;
-		debug(5, 5) ("comm_select: FD %d ready for writing\n", fd);
-		if (hdl != NULL) {
-		    F->write_handler = NULL;
-		    F->write_pending = COMM_PENDING_NORMAL;
-		    hdl(fd, F->write_data);
-		    statCounter.select_fds++;
-		    if (commCheckICPIncoming)
-			comm_poll_icp_incoming();
-		    if (commCheckDNSIncoming)
-			comm_poll_dns_incoming();
-		    if (commCheckHTTPIncoming)
-			comm_poll_http_incoming();
-		}
-	    }
-	    if (revents & POLLNVAL) {
-		close_handler *ch;
-		debug(5, 0) ("WARNING: FD %d has handlers, but it's invalid.\n", fd);
-		debug(5, 0) ("FD %d is a %s\n", fd, fdTypeStr[F->type]);
-		debug(5, 0) ("--> %s\n", F->desc);
-		debug(5, 0) ("tmout:%p read:%p write:%p\n",
-		    F->timeout_handler,
-		    F->read_handler,
-		    F->write_handler);
-		for (ch = F->close_handler; ch; ch = ch->next)
-		    debug(5, 0) (" close handler: %p\n", ch->handler);
-		if (F->close_handler) {
-		    commCallCloseHandlers(fd);
-		} else if (F->timeout_handler) {
-		    debug(5, 0) ("comm_select: Calling Timeout Handler\n");
-		    F->timeout_handler(fd, F->timeout_data);
-		}
-		F->close_handler = NULL;
-		F->timeout_handler = NULL;
-		F->read_handler = NULL;
-		F->write_handler = NULL;
-		if (F->flags.open)
-		    fd_close(fd);
-	    }
-	}
-	if (callicp)
-	    comm_poll_icp_incoming();
-	if (calldns)
-	    comm_poll_dns_incoming();
-	if (callhttp)
-	    comm_poll_http_incoming();
-#if DELAY_POOLS
-	while ((fd = commGetSlowFd()) != -1) {
-	    fde *F = &fd_table[fd];
-	    PF *hdl = F->read_handler;
-	    debug(5, 6) ("comm_select: slow FD %d selected for reading\n", fd);
-	    if (hdl != NULL) {
-		F->read_handler = NULL;
-		F->read_pending = COMM_PENDING_NORMAL;
-		hdl(fd, F->read_data);
-		statCounter.select_fds++;
-		if (commCheckICPIncoming)
-		    comm_poll_icp_incoming();
-		if (commCheckDNSIncoming)
-		    comm_poll_dns_incoming();
-		if (commCheckHTTPIncoming)
-		    comm_poll_http_incoming();
-	    }
-	}
-#endif
-	getCurrentTime();
-	statCounter.select_time += (current_dtime - start);
-	return COMM_OK;
+    pfds = xcalloc(sizeof(*pfds), Squid_MaxFD);
+    pfd_map = xcalloc(sizeof(*pfd_map), Squid_MaxFD);
+    for (i = 0; i < Squid_MaxFD; i++) {
+	pfd_map[i] = -1;
     }
-    while (timeout > current_dtime);
-    debug(5, 8) ("comm_select: time out: %ld.\n", (long int) squid_curtime);
-    return COMM_TIMEOUT;
-}
-
-static void
-comm_poll_dns_incoming(void)
-{
-    int nfds = 0;
-    int fds[2];
-    int nevents;
-    dns_io_events = 0;
-    if (DnsSocket < 0)
-	return;
-    fds[nfds++] = DnsSocket;
-    nevents = comm_check_incoming_poll_handlers(nfds, fds);
-    if (nevents < 0)
-	return;
-    incoming_dns_interval += Config.comm_incoming.dns_average - nevents;
-    if (incoming_dns_interval < Config.comm_incoming.dns_min_poll)
-	incoming_dns_interval = Config.comm_incoming.dns_min_poll;
-    if (incoming_dns_interval > MAX_INCOMING_INTERVAL)
-	incoming_dns_interval = MAX_INCOMING_INTERVAL;
-    if (nevents > INCOMING_DNS_MAX)
-	nevents = INCOMING_DNS_MAX;
-    statHistCount(&statCounter.comm_dns_incoming, nevents);
-}
-
-void
-comm_select_init(void)
-{
-    cachemgrRegister("comm_incoming",
-	"comm_incoming() stats",
-	commIncomingStats, 0, 1);
 }
 
 void
@@ -581,9 +62,10 @@
     debug(5, 1) ("Using poll for the IO loop\n");
 }
 
-void
-comm_select_shutdown(void)
+static void
+do_select_shutdown()
 {
+    safe_free(pfds);
 }
 
 void
@@ -592,86 +74,77 @@
     storeAppendPrintf(sentry, "\tIO loop method:                     poll\n");
 }
 
-static void
-commIncomingStats(StoreEntry * sentry)
-{
-    StatCounters *f = &statCounter;
-    storeAppendPrintf(sentry, "Current incoming_icp_interval: %d\n",
-	incoming_icp_interval >> INCOMING_FACTOR);
-    storeAppendPrintf(sentry, "Current incoming_dns_interval: %d\n",
-	incoming_dns_interval >> INCOMING_FACTOR);
-    storeAppendPrintf(sentry, "Current incoming_http_interval: %d\n",
-	incoming_http_interval >> INCOMING_FACTOR);
-    storeAppendPrintf(sentry, "\n");
-    storeAppendPrintf(sentry, "Histogram of events per incoming socket type\n");
-    storeAppendPrintf(sentry, "ICP Messages handled per comm_poll_icp_incoming() call:\n");
-    statHistDump(&f->comm_icp_incoming, sentry, statHistIntDumper);
-    storeAppendPrintf(sentry, "DNS Messages handled per comm_poll_dns_incoming() call:\n");
-    statHistDump(&f->comm_dns_incoming, sentry, statHistIntDumper);
-    storeAppendPrintf(sentry, "HTTP Messages handled per comm_poll_http_incoming() call:\n");
-    statHistDump(&f->comm_http_incoming, sentry, statHistIntDumper);
-}
-
 void
 commSetEvents(int fd, int need_read, int need_write)
 {
-    /* XXX Here we could optimize the poll arrays quite considerably */
+    int pfdn = pfd_map[fd];
+    struct pollfd *pfd = pfdn >= 0 ? &pfds[pfdn] : NULL;
+    short events = (need_read ? POLLRDNORM : 0) | (need_write ? POLLWRNORM : 0);
+
+    if (!pfd && !events)
+	return;
+
+    if (!pfd) {
+	pfdn = nfds++;
+	pfd_map[fd] = pfdn;
+	pfd = &pfds[pfdn];
+	pfd->fd = fd;
+	pfd->events = events;
+    } else if (events) {
+	pfd->events = events;
+    } else {
+	pfd_map[fd] = -1;
+	nfds--;
+	*pfd = pfds[nfds];
+	pfds[nfds].events = 0;
+	pfds[nfds].revents = 0;
+	pfds[nfds].fd = -1;
+	if (pfd->fd >= 0)
+	    pfd_map[pfd->fd] = pfdn;
+    }
 }
 
 static int
-commDeferRead(int fd)
+do_comm_select(int msec)
 {
-    fde *F = &fd_table[fd];
-    if (F->defer_check == NULL)
-	return 0;
-    return F->defer_check(fd, F->defer_data);
-}
+    int num;
+    int i;
 
-static void
-checkTimeouts(void)
-{
-    int fd;
-    fde *F = NULL;
-    PF *callback;
-    for (fd = 0; fd <= Biggest_FD; fd++) {
-	F = &fd_table[fd];
-	if (!F->flags.open)
-	    continue;
-	if (F->timeout == 0)
-	    continue;
-	if (F->timeout > squid_curtime)
-	    continue;
-	debug(5, 5) ("checkTimeouts: FD %d Expired\n", fd);
-	if (F->timeout_handler) {
-	    debug(5, 5) ("checkTimeouts: FD %d: Call timeout handler\n", fd);
-	    callback = F->timeout_handler;
-	    F->timeout_handler = NULL;
-	    callback(fd, F->timeout_data);
-	} else {
-	    debug(5, 5) ("checkTimeouts: FD %d: Forcing comm_close()\n", fd);
-	    comm_close(fd);
-	}
+    if (nfds == 0) {
+	assert(shutting_down);
+	return COMM_SHUTDOWN;
     }
-}
 
+    statCounter.syscalls.selects++;
+    num = poll(pfds, nfds, msec);
+    if (num < 0) {
+	getCurrentTime();
+	if (ignoreErrno(errno))
+	    return COMM_OK;
 
-/* Called by async-io or diskd to speed up the polling */
-void
-comm_quick_poll_required(void)
-{
-    MAX_POLL_TIME = 10;
-}
+	debug(5, 1) ("comm_select: poll failure: %s\n", xstrerror());
+	return COMM_ERROR;
+    }
+    statHistCount(&statCounter.select_fds_hist, num);
 
-/* Defer reads from this fd */
-void
-commDeferFD(int fd)
-{
-    /* Not implemented */
-}
+    if (num == 0)
+	return COMM_TIMEOUT;
 
-/* Resume reading from the given fd */
-void
-commResumeFD(int fd)
-{
-    /* Not implemented */
+    for (i = nfds - 1; num > 0 && i >= 0 ; i--) {
+	struct pollfd *pfd = &pfds[i];
+	short read_event, write_event;
+
+	if (!pfd->revents)
+	    continue;
+
+	read_event = pfd->revents & (POLLRDNORM | POLLIN | POLLHUP | POLLERR);
+	write_event = pfd->revents & (POLLWRNORM | POLLOUT | POLLHUP | POLLERR);
+
+	pfd->revents = 0;
+
+	comm_call_handlers(pfd->fd, read_event, write_event);
+	num--;
+    }
+
+    return COMM_OK;
 }
Index: squid/src/comm_select.c
diff -u squid/src/comm_select.c:1.31 squid/src/comm_select.c:1.29.2.5
--- squid/src/comm_select.c:1.31	Tue Aug 15 12:52:42 2006
+++ squid/src/comm_select.c	Mon Oct 23 04:10:48 2006
@@ -33,13 +33,12 @@
  */
 
 #include "squid.h"
+#include "comm_generic.c"
 
 #if HAVE_SYS_SELECT_H
 #include <sys/select.h>
 #endif
 
-static int MAX_POLL_TIME = 1000;	/* see also comm_quick_poll_required() */
-
 #ifndef        howmany
 #define howmany(x, y)   (((x)+((y)-1))/(y))
 #endif
@@ -49,522 +48,20 @@
 #define FD_MASK_BYTES sizeof(fd_mask)
 #define FD_MASK_BITS (FD_MASK_BYTES*NBBY)
 
-/* STATIC */
-static int examine_select(fd_set *, fd_set *);
-static int fdIsHttp(int fd);
-static int fdIsIcp(int fd);
-static int fdIsDns(int fd);
-static int commDeferRead(int fd);
-static void checkTimeouts(void);
-static OBJH commIncomingStats;
-static int comm_check_incoming_select_handlers(int nfds, int *fds);
-static void comm_select_dns_incoming(void);
-
-static struct timeval zero_tv;
-static fd_set global_readfds;
-static fd_set global_writefds;
+static fd_set *global_readfds;
+static fd_set *global_writefds;
+static fd_set *current_readfds;
+static fd_set *current_writefds;
 static int nreadfds;
 static int nwritefds;
 
-/*
- * Automatic tuning for incoming requests:
- *
- * INCOMING sockets are the ICP and HTTP ports.  We need to check these
- * fairly regularly, but how often?  When the load increases, we
- * want to check the incoming sockets more often.  If we have a lot
- * of incoming ICP, then we need to check these sockets more than
- * if we just have HTTP.
- *
- * The variables 'incoming_icp_interval' and 'incoming_http_interval' 
- * determine how many normal I/O events to process before checking
- * incoming sockets again.  Note we store the incoming_interval
- * multipled by a factor of (2^INCOMING_FACTOR) to have some
- * pseudo-floating point precision.
- *
- * The variable 'icp_io_events' and 'http_io_events' counts how many normal
- * I/O events have been processed since the last check on the incoming
- * sockets.  When io_events > incoming_interval, its time to check incoming
- * sockets.
- *
- * Every time we check incoming sockets, we count how many new messages
- * or connections were processed.  This is used to adjust the
- * incoming_interval for the next iteration.  The new incoming_interval
- * is calculated as the current incoming_interval plus what we would
- * like to see as an average number of events minus the number of
- * events just processed.
- *
- *  incoming_interval = incoming_interval + target_average - number_of_events_processed
- *
- * There are separate incoming_interval counters for both HTTP and ICP events
- * 
- * You can see the current values of the incoming_interval's, as well as
- * a histogram of 'incoming_events' by asking the cache manager
- * for 'comm_incoming', e.g.:
- *
- *      % ./client mgr:comm_incoming
- *
- * Caveats:
- *
- *      - We have MAX_INCOMING_INTEGER as a magic upper limit on
- *        incoming_interval for both types of sockets.  At the
- *        largest value the cache will effectively be idling.
- *
- *      - The higher the INCOMING_FACTOR, the slower the algorithm will
- *        respond to load spikes/increases/decreases in demand. A value
- *        between 3 and 8 is recommended.
- */
-
-#define MAX_INCOMING_INTEGER 256
-#define INCOMING_FACTOR 5
-#define MAX_INCOMING_INTERVAL (MAX_INCOMING_INTEGER << INCOMING_FACTOR)
-static int icp_io_events = 0;
-static int dns_io_events = 0;
-static int http_io_events = 0;
-static int incoming_icp_interval = 16 << INCOMING_FACTOR;
-static int incoming_dns_interval = 16 << INCOMING_FACTOR;
-static int incoming_http_interval = 16 << INCOMING_FACTOR;
-#define commCheckICPIncoming (++icp_io_events > (incoming_icp_interval>> INCOMING_FACTOR))
-#define commCheckDNSIncoming (++dns_io_events > (incoming_dns_interval>> INCOMING_FACTOR))
-#define commCheckHTTPIncoming (++http_io_events > (incoming_http_interval>> INCOMING_FACTOR))
-
-static int
-fdIsIcp(int fd)
-{
-    if (fd == theInIcpConnection)
-	return 1;
-    if (fd == theOutIcpConnection)
-	return 1;
-    return 0;
-}
-
-static int
-fdIsDns(int fd)
-{
-    if (fd == DnsSocket)
-	return 1;
-    return 0;
-}
-
-static int
-fdIsHttp(int fd)
-{
-    int j;
-    for (j = 0; j < NHttpSockets; j++) {
-	if (fd == HttpSockets[j])
-	    return 1;
-    }
-    return 0;
-}
-
-#if DELAY_POOLS
-static int slowfdcnt = 0;
-static int slowfdarr[SQUID_MAXFD];
-
-static void
-commAddSlowFd(int fd)
-{
-    assert(slowfdcnt < SQUID_MAXFD);
-    slowfdarr[slowfdcnt++] = fd;
-}
-
-static int
-commGetSlowFd(void)
-{
-    int whichfd, retfd;
-
-    if (!slowfdcnt)
-	return -1;
-    whichfd = squid_random() % slowfdcnt;
-    retfd = slowfdarr[whichfd];
-    slowfdarr[whichfd] = slowfdarr[--slowfdcnt];
-    return retfd;
-}
-#endif
-
-static int
-comm_check_incoming_select_handlers(int nfds, int *fds)
-{
-    int i;
-    int fd;
-    int maxfd = 0;
-    fd_set read_mask;
-    fd_set write_mask;
-    FD_ZERO(&read_mask);
-    FD_ZERO(&write_mask);
-    incoming_sockets_accepted = 0;
-    for (i = 0; i < nfds; i++) {
-	fd = fds[i];
-	if (fd_table[fd].read_handler) {
-	    FD_SET(fd, &read_mask);
-	    if (fd > maxfd)
-		maxfd = fd;
-	}
-	if (fd_table[fd].write_handler) {
-	    FD_SET(fd, &write_mask);
-	    if (fd > maxfd)
-		maxfd = fd;
-	}
-    }
-    if (maxfd++ == 0)
-	return -1;
-    getCurrentTime();
-    statCounter.syscalls.selects++;
-    if (select(maxfd, &read_mask, &write_mask, NULL, &zero_tv) < 1)
-	return incoming_sockets_accepted;
-    for (i = 0; i < nfds; i++) {
-	fd = fds[i];
-	if (FD_ISSET(fd, &read_mask)) {
-	    PF *hdl = fd_table[fd].read_handler;
-	    void *hdl_data = fd_table[fd].read_data;
-	    commUpdateReadHandler(fd, NULL, NULL);
-	    if (hdl)
-		hdl(fd, hdl_data);
-	    else
-		debug(5, 1) ("comm_select_incoming: FD %d NULL read handler\n", fd);
-	}
-	if (FD_ISSET(fd, &write_mask)) {
-	    PF *hdl = fd_table[fd].write_handler;
-	    void *hdl_data = fd_table[fd].write_data;
-	    commUpdateWriteHandler(fd, NULL, NULL);
-	    if (hdl)
-		hdl(fd, hdl_data);
-	    else
-		debug(5, 1) ("comm_select_incoming: FD %d NULL write handler\n", fd);
-	}
-    }
-    return incoming_sockets_accepted;
-}
-
 static void
-comm_select_icp_incoming(void)
+do_select_init()
 {
-    int nfds = 0;
-    int fds[2];
-    int nevents;
-    icp_io_events = 0;
-    if (theInIcpConnection >= 0)
-	fds[nfds++] = theInIcpConnection;
-    if (theInIcpConnection != theOutIcpConnection)
-	if (theOutIcpConnection >= 0)
-	    fds[nfds++] = theOutIcpConnection;
-    if (nfds == 0)
-	return;
-    nevents = comm_check_incoming_select_handlers(nfds, fds);
-    incoming_icp_interval += Config.comm_incoming.icp_average - nevents;
-    if (incoming_icp_interval < 0)
-	incoming_icp_interval = 0;
-    if (incoming_icp_interval > MAX_INCOMING_INTERVAL)
-	incoming_icp_interval = MAX_INCOMING_INTERVAL;
-    if (nevents > INCOMING_ICP_MAX)
-	nevents = INCOMING_ICP_MAX;
-    statHistCount(&statCounter.comm_icp_incoming, nevents);
-}
-
-static void
-comm_select_http_incoming(void)
-{
-    int nfds = 0;
-    int fds[MAXHTTPPORTS];
-    int j;
-    int nevents;
-    http_io_events = 0;
-    for (j = 0; j < NHttpSockets; j++) {
-	if (HttpSockets[j] < 0)
-	    continue;
-	if (commDeferRead(HttpSockets[j]))
-	    continue;
-	fds[nfds++] = HttpSockets[j];
-    }
-    nevents = comm_check_incoming_select_handlers(nfds, fds);
-    incoming_http_interval += Config.comm_incoming.http_average - nevents;
-    if (incoming_http_interval < 0)
-	incoming_http_interval = 0;
-    if (incoming_http_interval > MAX_INCOMING_INTERVAL)
-	incoming_http_interval = MAX_INCOMING_INTERVAL;
-    if (nevents > INCOMING_HTTP_MAX)
-	nevents = INCOMING_HTTP_MAX;
-    statHistCount(&statCounter.comm_http_incoming, nevents);
-}
-
-#define DEBUG_FDBITS 0
-/* Select on all sockets; call handlers for those that are ready. */
-int
-comm_select(int msec)
-{
-    fd_set readfds;
-    fd_set writefds;
-#if DELAY_POOLS
-    fd_set slowfds;
-#endif
-    int fd;
-    int maxfd;
-    int num;
-    int callicp = 0, callhttp = 0;
-    int calldns = 0;
-    int maxindex;
-    int k;
-    int j;
-    fd_mask *rfdsp;
-    fd_mask *wfdsp;
-    fd_mask tmask;
-    static time_t last_timeout = 0;
-    struct timeval poll_time;
-    double timeout = current_dtime + (msec / 1000.0);
-    do {
-	double start;
-	getCurrentTime();
-	start = current_dtime;
-#if DELAY_POOLS
-	FD_ZERO(&slowfds);
-#endif
-	/* Handle any fs callbacks that need doing */
-	storeDirCallback();
-	if (commCheckICPIncoming)
-	    comm_select_icp_incoming();
-	if (commCheckDNSIncoming)
-	    comm_select_dns_incoming();
-	if (commCheckHTTPIncoming)
-	    comm_select_http_incoming();
-	callicp = calldns = callhttp = 0;
-	maxfd = Biggest_FD + 1;
-	xmemcpy(&readfds, &global_readfds,
-	    howmany(maxfd, FD_MASK_BITS) * FD_MASK_BYTES);
-	xmemcpy(&writefds, &global_writefds,
-	    howmany(maxfd, FD_MASK_BITS) * FD_MASK_BYTES);
-	/* remove stalled FDs, and deal with pending descriptors */
-	maxindex = howmany(maxfd, FD_MASK_BITS);
-	/* Note: To simplify logics we are cheating a little on pending fds
-	 * by assuming they will be ready for either read or write to trigger
-	 * the callback.
-	 */
-	rfdsp = (fd_mask *) & readfds;
-	for (j = 0; j < maxindex; j++) {
-	    if ((tmask = rfdsp[j]) == 0)
-		continue;	/* no bits here */
-	    for (k = 0; k < FD_MASK_BITS; k++) {
-		if (!EBIT_TEST(tmask, k))
-		    continue;
-		/* Found a set bit */
-		fd = (j * FD_MASK_BITS) + k;
-		if (fd_table[fd].read_handler) {
-		    switch (commDeferRead(fd)) {
-		    case 0:
-			break;
-		    case 1:
-			FD_CLR(fd, &readfds);
-			if (!fd_table[fd].write_handler)
-			    FD_CLR(fd, &writefds);
-			break;
-#if DELAY_POOLS
-		    case -1:
-			FD_SET(fd, &slowfds);
-			break;
-#endif
-		    default:
-			fatalf("bad return value from commDeferRead(FD %d)\n", fd);
-		    }
-		}
-	    }
-	}
-	if (nreadfds + nwritefds == 0) {
-	    assert(shutting_down);
-	    return COMM_SHUTDOWN;
-	}
-	if (msec > MAX_POLL_TIME)
-	    msec = MAX_POLL_TIME;
-	for (;;) {
-	    poll_time.tv_sec = msec / 1000;
-	    poll_time.tv_usec = (msec % 1000) * 1000;
-	    statCounter.syscalls.selects++;
-	    num = select(maxfd, &readfds, &writefds, NULL, &poll_time);
-	    statCounter.select_loops++;
-	    if (num >= 0)
-		break;
-	    if (ignoreErrno(errno))
-		break;
-	    debug(5, 0) ("comm_select: select failure: %s\n",
-		xstrerror());
-	    examine_select(&readfds, &writefds);
-	    return COMM_ERROR;
-	    /* NOTREACHED */
-	}
-	if (num < 0)
-	    continue;
-	debug(5, num ? 5 : 8) ("comm_select: %d FDs ready at %d\n",
-	    num, (int) squid_curtime);
-	statHistCount(&statCounter.select_fds_hist, num);
-	/* Check lifetime and timeout handlers ONCE each second.
-	 * Replaces brain-dead check every time through the loop! */
-	if (squid_curtime > last_timeout) {
-	    last_timeout = squid_curtime;
-	    checkTimeouts();
-	}
-	if (num == 0)
-	    continue;
-	/* Scan return fd masks for ready descriptors */
-	rfdsp = (fd_mask *) & readfds;
-	wfdsp = (fd_mask *) & writefds;
-	maxindex = howmany(maxfd, FD_MASK_BITS);
-	for (j = 0; j < maxindex; j++) {
-	    if ((tmask = (rfdsp[j] | wfdsp[j])) == 0)
-		continue;	/* no bits here */
-	    for (k = 0; k < FD_MASK_BITS; k++) {
-		fde *F;
-		if (tmask == 0)
-		    break;	/* no more bits left */
-		if (!EBIT_TEST(tmask, k))
-		    continue;
-		/* Found a set bit */
-		fd = (j * FD_MASK_BITS) + k;
-		EBIT_CLR(tmask, k);	/* this will be done */
-		if (fdIsIcp(fd)) {
-		    callicp = 1;
-		    continue;
-		}
-		if (fdIsDns(fd)) {
-		    calldns = 1;
-		    continue;
-		}
-		if (fdIsHttp(fd)) {
-		    callhttp = 1;
-		    continue;
-		}
-		F = &fd_table[fd];
-		debug(5, 6) ("comm_select: FD %d ready for %s%s\n", fd,
-		    FD_ISSET(fd, &readfds) ? "read" : "",
-		    FD_ISSET(fd, &writefds) ? "write" : "");
-		if (F->read_handler) {
-		    int do_read = 0;
-		    switch (F->read_pending) {
-		    case COMM_PENDING_NORMAL:
-		    case COMM_PENDING_WANTS_READ:
-			do_read = FD_ISSET(fd, &readfds);
-			break;
-		    case COMM_PENDING_WANTS_WRITE:
-			do_read = FD_ISSET(fd, &writefds);
-			break;
-		    case COMM_PENDING_NOW:
-			do_read = 1;
-			break;
-		    }
-#if DELAY_POOLS
-		    if (do_read && FD_ISSET(fd, &slowfds))
-			commAddSlowFd(fd);
-		    else
-#endif
-		    if (do_read) {
-			PF *hdl = F->read_handler;
-			void *hdl_data = F->read_data;
-			debug(5, 6) ("comm_select: FD %d calling read_handler %p(%p)\n", fd, hdl, hdl_data);
-			commUpdateReadHandler(fd, NULL, NULL);
-			hdl(fd, hdl_data);
-			statCounter.select_fds++;
-			if (commCheckICPIncoming)
-			    comm_select_icp_incoming();
-			if (commCheckDNSIncoming)
-			    comm_select_dns_incoming();
-			if (commCheckHTTPIncoming)
-			    comm_select_http_incoming();
-		    }
-		}
-		if (F->write_handler) {
-		    int do_write = 0;
-		    switch (F->write_pending) {
-		    case COMM_PENDING_WANTS_READ:
-			do_write = FD_ISSET(fd, &readfds);
-			break;
-		    case COMM_PENDING_NORMAL:
-		    case COMM_PENDING_WANTS_WRITE:
-			do_write = FD_ISSET(fd, &writefds);
-			break;
-		    case COMM_PENDING_NOW:
-			do_write = 1;
-			break;
-		    }
-		    if (do_write) {
-			PF *hdl = F->write_handler;
-			void *hdl_data = F->write_data;
-			debug(5, 6) ("comm_select: FD %d calling write_handler %p(%p)\n", fd, hdl, hdl_data);
-			commUpdateWriteHandler(fd, NULL, NULL);
-			hdl(fd, hdl_data);
-			statCounter.select_fds++;
-			if (commCheckICPIncoming)
-			    comm_select_icp_incoming();
-			if (commCheckDNSIncoming)
-			    comm_select_dns_incoming();
-			if (commCheckHTTPIncoming)
-			    comm_select_http_incoming();
-		    }
-		}
-	    }
-	}
-	if (callicp)
-	    comm_select_icp_incoming();
-	if (calldns)
-	    comm_select_dns_incoming();
-	if (callhttp)
-	    comm_select_http_incoming();
-#if DELAY_POOLS
-	while ((fd = commGetSlowFd()) != -1) {
-	    fde *F = &fd_table[fd];
-	    PF *hdl = F->read_handler;
-	    void *hdl_data = F->read_data;
-	    debug(5, 6) ("comm_select: slow FD %d selected for reading\n", fd);
-	    if (hdl) {
-		commUpdateReadHandler(fd, NULL, NULL);
-		hdl(fd, hdl_data);
-		statCounter.select_fds++;
-		if (commCheckICPIncoming)
-		    comm_select_icp_incoming();
-		if (commCheckDNSIncoming)
-		    comm_select_dns_incoming();
-		if (commCheckHTTPIncoming)
-		    comm_select_http_incoming();
-	    }
-	}
-#endif
-	getCurrentTime();
-	statCounter.select_time += (current_dtime - start);
-	return COMM_OK;
-    }
-    while (timeout > current_dtime);
-    debug(5, 8) ("comm_select: time out: %d\n", (int) squid_curtime);
-    return COMM_TIMEOUT;
-}
-
-static void
-comm_select_dns_incoming(void)
-{
-    int nfds = 0;
-    int fds[2];
-    int nevents;
-    dns_io_events = 0;
-    if (DnsSocket < 0)
-	return;
-    fds[nfds++] = DnsSocket;
-    nevents = comm_check_incoming_select_handlers(nfds, fds);
-    if (nevents < 0)
-	return;
-    incoming_dns_interval += Config.comm_incoming.dns_average - nevents;
-    if (incoming_dns_interval < Config.comm_incoming.dns_min_poll)
-	incoming_dns_interval = Config.comm_incoming.dns_min_poll;
-    if (incoming_dns_interval > MAX_INCOMING_INTERVAL)
-	incoming_dns_interval = MAX_INCOMING_INTERVAL;
-    if (nevents > INCOMING_DNS_MAX)
-	nevents = INCOMING_DNS_MAX;
-    statHistCount(&statCounter.comm_dns_incoming, nevents);
-}
-
-void
-comm_select_init(void)
-{
-    zero_tv.tv_sec = 0;
-    zero_tv.tv_usec = 0;
-    cachemgrRegister("comm_incoming",
-	"comm_incoming() stats",
-	commIncomingStats, 0, 1);
-    FD_ZERO(&global_readfds);
-    FD_ZERO(&global_writefds);
+    global_readfds = xcalloc(FD_MASK_BYTES, howmany(Squid_MaxFD, FD_MASK_BITS));
+    global_writefds = xcalloc(FD_MASK_BYTES, howmany(Squid_MaxFD, FD_MASK_BITS));
+    current_readfds = xcalloc(FD_MASK_BYTES, howmany(Squid_MaxFD, FD_MASK_BITS));
+    current_writefds = xcalloc(FD_MASK_BYTES, howmany(Squid_MaxFD, FD_MASK_BITS));
     nreadfds = nwritefds = 0;
 }
 
@@ -574,9 +71,13 @@
     debug(5, 1) ("Using select for the IO loop\n");
 }
 
-void
-comm_select_shutdown(void)
+static void
+do_select_shutdown()
 {
+    safe_free(global_readfds);
+    safe_free(global_writefds);
+    safe_free(current_readfds);
+    safe_free(current_writefds);
 }
 
 void
@@ -585,165 +86,73 @@
     storeAppendPrintf(sentry, "\tIO loop method:                     select\n");
 }
 
-/*
- * examine_select - debug routine.
- *
- * I spend the day chasing this core dump that occurs when both the client
- * and the server side of a cache fetch simultaneoulsy abort the
- * connection.  While I haven't really studied the code to figure out how
- * it happens, the snippet below may prevent the cache from exitting:
- * 
- * Call this from where the select loop fails.
- */
-static int
-examine_select(fd_set * readfds, fd_set * writefds)
-{
-    int fd = 0;
-    fd_set read_x;
-    fd_set write_x;
-    struct timeval tv;
-    close_handler *ch = NULL;
-    fde *F = NULL;
-    struct stat sb;
-    debug(5, 0) ("examine_select: Examining open file descriptors...\n");
-    for (fd = 0; fd < Squid_MaxFD; fd++) {
-	FD_ZERO(&read_x);
-	FD_ZERO(&write_x);
-	tv.tv_sec = tv.tv_usec = 0;
-	if (FD_ISSET(fd, readfds))
-	    FD_SET(fd, &read_x);
-	else if (FD_ISSET(fd, writefds))
-	    FD_SET(fd, &write_x);
-	else
-	    continue;
-	statCounter.syscalls.selects++;
-	errno = 0;
-	if (!fstat(fd, &sb)) {
-	    debug(5, 5) ("FD %d is valid.\n", fd);
-	    continue;
-	}
-	F = &fd_table[fd];
-	debug(5, 0) ("FD %d: %s\n", fd, xstrerror());
-	debug(5, 0) ("WARNING: FD %d has handlers, but it's invalid.\n", fd);
-	debug(5, 0) ("FD %d is a %s called '%s'\n",
-	    fd,
-	    fdTypeStr[F->type],
-	    F->desc);
-	debug(5, 0) ("tmout:%p read:%p write:%p\n",
-	    F->timeout_handler,
-	    F->read_handler,
-	    F->write_handler);
-	for (ch = F->close_handler; ch; ch = ch->next)
-	    debug(5, 0) (" close handler: %p\n", ch->handler);
-	if (F->close_handler) {
-	    commCallCloseHandlers(fd);
-	} else if (F->timeout_handler) {
-	    debug(5, 0) ("examine_select: Calling Timeout Handler\n");
-	    F->timeout_handler(fd, F->timeout_data);
-	}
-	F->close_handler = NULL;
-	F->timeout_handler = NULL;
-	F->read_handler = NULL;
-	F->write_handler = NULL;
-	FD_CLR(fd, readfds);
-	FD_CLR(fd, writefds);
-    }
-    return 0;
-}
-
-static void
-commIncomingStats(StoreEntry * sentry)
-{
-    StatCounters *f = &statCounter;
-    storeAppendPrintf(sentry, "Current incoming_icp_interval: %d\n",
-	incoming_icp_interval >> INCOMING_FACTOR);
-    storeAppendPrintf(sentry, "Current incoming_dns_interval: %d\n",
-	incoming_dns_interval >> INCOMING_FACTOR);
-    storeAppendPrintf(sentry, "Current incoming_http_interval: %d\n",
-	incoming_http_interval >> INCOMING_FACTOR);
-    storeAppendPrintf(sentry, "\n");
-    storeAppendPrintf(sentry, "Histogram of events per incoming socket type\n");
-    storeAppendPrintf(sentry, "ICP Messages handled per comm_select_icp_incoming() call:\n");
-    statHistDump(&f->comm_icp_incoming, sentry, statHistIntDumper);
-    storeAppendPrintf(sentry, "DNS Messages handled per comm_select_dns_incoming() call:\n");
-    statHistDump(&f->comm_dns_incoming, sentry, statHistIntDumper);
-    storeAppendPrintf(sentry, "HTTP Messages handled per comm_select_http_incoming() call:\n");
-    statHistDump(&f->comm_http_incoming, sentry, statHistIntDumper);
-}
-
 void
 commSetEvents(int fd, int need_read, int need_write)
 {
-    if (need_read && !FD_ISSET(fd, &global_readfds)) {
-	FD_SET(fd, &global_readfds);
+    if (need_read && !FD_ISSET(fd, global_readfds)) {
+	FD_SET(fd, global_readfds);
 	nreadfds++;
-    } else if (!need_read && FD_ISSET(fd, &global_readfds)) {
-	FD_CLR(fd, &global_readfds);
+    } else if (!need_read && FD_ISSET(fd, global_readfds)) {
+	FD_CLR(fd, global_readfds);
 	nreadfds--;
-    }
-    if (need_write && !FD_ISSET(fd, &global_writefds)) {
-	FD_SET(fd, &global_writefds);
+    } if (need_write && !FD_ISSET(fd, global_writefds)) {
+	FD_SET(fd, global_writefds);
 	nwritefds++;
-    } else if (!need_write && FD_ISSET(fd, &global_writefds)) {
-	FD_CLR(fd, &global_writefds);
+    } else if (!need_write && FD_ISSET(fd, global_writefds)) {
+	FD_CLR(fd, global_writefds);
 	nwritefds--;
     }
 }
 
 static int
-commDeferRead(int fd)
+do_comm_select(int msec)
 {
-    fde *F = &fd_table[fd];
-    if (F->defer_check == NULL)
-	return 0;
-    return F->defer_check(fd, F->defer_data);
-}
+    int num;
+    struct timeval tv;
+    fd_mask *rfdsp = (fd_mask *) current_readfds;
+    fd_mask *wfdsp = (fd_mask *) current_writefds;
+    int maxindex = howmany(Biggest_FD + 1, FD_MASK_BITS);
+    int fd_set_size = maxindex * FD_MASK_BYTES;
+    int j;
 
-static void
-checkTimeouts(void)
-{
-    int fd;
-    fde *F = NULL;
-    PF *callback;
-    for (fd = 0; fd <= Biggest_FD; fd++) {
-	F = &fd_table[fd];
-	if (!F->flags.open)
-	    continue;
-	if (F->timeout == 0)
-	    continue;
-	if (F->timeout > squid_curtime)
-	    continue;
-	debug(5, 5) ("checkTimeouts: FD %d Expired\n", fd);
-	if (F->timeout_handler) {
-	    debug(5, 5) ("checkTimeouts: FD %d: Call timeout handler\n", fd);
-	    callback = F->timeout_handler;
-	    F->timeout_handler = NULL;
-	    callback(fd, F->timeout_data);
-	} else {
-	    debug(5, 5) ("checkTimeouts: FD %d: Forcing comm_close()\n", fd);
-	    comm_close(fd);
-	}
+    if (nreadfds + nwritefds == 0) {
+	assert(shutting_down);
+	return COMM_SHUTDOWN;
     }
-}
 
+    memcpy(current_readfds, global_readfds, fd_set_size);
+    memcpy(current_writefds, global_writefds, fd_set_size);
+    tv.tv_sec = msec / 1000;
+    tv.tv_usec = (msec % 1000) * 1000;
+    statCounter.syscalls.selects++;
+    num = select(Biggest_FD + 1, current_readfds, current_writefds, NULL, &tv);
+    statCounter.select_loops++;
 
-/* Called by async-io or diskd to speed up the polling */
-void
-comm_quick_poll_required(void)
-{
-    MAX_POLL_TIME = 10;
-}
+    if (num < 0) {
+	getCurrentTime();
+	if (ignoreErrno(errno))
+	    return COMM_OK;
 
-/* Defer reads from this fd */
-void
-commDeferFD(int fd)
-{
-    /* Not implemented */
-}
+	debug(5, 1) ("comm_select: select failure: %s\n", xstrerror());
+	return COMM_ERROR;
+    }
+    statHistCount(&statCounter.select_fds_hist, num);
 
-/* Resume reading from the given fd */
-void
-commResumeFD(int fd)
-{
-    /* Not implemented */
+    if (num == 0)
+	return COMM_TIMEOUT;
+
+    for (j = 0; j < maxindex; j++) {
+	int k;
+	fd_mask tmask = rfdsp[j] | wfdsp[j];
+	for (k = 0; tmask && k < FD_MASK_BITS; k++) {
+	    int fd;
+	    if (!EBIT_TEST(tmask, k))
+		continue;
+	    /* Found a set bit */
+	    fd = (j * FD_MASK_BITS) + k;
+	    EBIT_CLR(tmask, k);	/* this will be done */
+	    comm_call_handlers(fd, FD_ISSET(fd, current_readfds), FD_ISSET(fd, current_writefds));
+	}
+    }
+    return COMM_OK;
 }
Index: squid/src/comm_select_win32.c
diff -u squid/src/comm_select_win32.c:1.3 squid/src/comm_select_win32.c:1.1.10.2
--- squid/src/comm_select_win32.c:1.3	Tue Aug 15 13:52:37 2006
+++ squid/src/comm_select_win32.c	Mon Oct 23 03:42:10 2006
@@ -762,6 +762,9 @@
 	    comm_close(fd);
 	}
     }
+#if DELAYPOOLS
+    delayPoolsUpdate(NULL);
+#endif
 }
 
 
Index: squid/src/delay_pools.c
diff -u squid/src/delay_pools.c:1.15 squid/src/delay_pools.c:1.14.2.2
--- squid/src/delay_pools.c:1.15	Wed Aug  2 14:51:41 2006
+++ squid/src/delay_pools.c	Mon Oct 23 03:42:13 2006
@@ -145,7 +145,6 @@
 	return;
     delay_data = xcalloc(pools, sizeof(*delay_data));
     memory_used += pools * sizeof(*delay_data);
-    eventAdd("delayPoolsUpdate", delayPoolsUpdate, NULL, 1.0, 1);
     delay_id_ptr_hash = hash_create(delayIdPtrHashCmp, 256, delayIdPtrHash);
 }
 
@@ -164,7 +163,6 @@
 {
     if (!delay_id_ptr_hash)
 	return;
-    eventDelete(delayPoolsUpdate, NULL);
     safe_free(delay_data);
     memory_used -= pools * sizeof(*delay_data);
     hashFreeItems(delay_id_ptr_hash, delayIdZero);
@@ -546,7 +544,6 @@
     unsigned char class;
     if (!Config.Delay.pools)
 	return;
-    eventAdd("delayPoolsUpdate", delayPoolsUpdate, NULL, 1.0, 1);
     if (incr < 1)
 	return;
     delay_pools_last_update = squid_curtime;
@@ -682,6 +679,8 @@
     if (jmax >= 0 && jmax < (int) *read_sz) {
 	if (jmax == 0)
 	    jmax = 1;
+	if (jmax > 1460)
+	    jmax = 1460;
 	*read_sz = (size_t) jmax;
     }
     return d;
Index: squid/src/fd.c
diff -u squid/src/fd.c:1.17 squid/src/fd.c:1.15.2.2
--- squid/src/fd.c:1.17	Sat Sep  9 05:50:47 2006
+++ squid/src/fd.c	Sat Sep 30 17:59:27 2006
@@ -92,6 +92,10 @@
     debug(51, 3) ("fd_close FD %d %s\n", fd, F->desc);
     commSetEvents(fd, 0, 0);
     F->flags.open = 0;
+#if DELAY_POOLS
+    if (F->slow_id)
+	commRemoveSlow(fd);
+#endif
     fdUpdateBiggest(fd, 0);
     Number_FD--;
     memset(F, '\0', sizeof(fde));
@@ -197,6 +201,13 @@
 }
 
 void
+fd_init(void)
+{
+    fd_table = xcalloc(Squid_MaxFD, sizeof(fde));
+    /* XXX account fd_table */
+}
+
+void
 fdFreeMemory(void)
 {
     safe_free(fd_table);
Index: squid/src/main.c
diff -u squid/src/main.c:1.71 squid/src/main.c:1.61.2.2
--- squid/src/main.c:1.71	Sat Sep  2 07:17:45 2006
+++ squid/src/main.c	Sat Sep 30 16:45:31 2006
@@ -671,8 +671,6 @@
 #endif
 
     debug_log = stderr;
-    if (FD_SETSIZE < Squid_MaxFD)
-	Squid_MaxFD = FD_SETSIZE;
 
 #ifdef _SQUID_WIN32_
     if ((WIN32_init_err = WIN32_Subsystem_Init(&argc, &argv)))
Index: squid/src/protos.h
diff -u squid/src/protos.h:1.128 squid/src/protos.h:1.107.2.2
--- squid/src/protos.h:1.128	Fri Sep 22 03:53:21 2006
+++ squid/src/protos.h	Sat Sep 30 17:59:27 2006
@@ -166,6 +166,7 @@
 extern void commDeferFD(int fd);
 extern void commResumeFD(int fd);
 extern void commSetSelect(int, unsigned int, PF *, void *, time_t);
+extern void commRemoveSlow(int fd);
 extern void comm_add_close_handler(int fd, PF *, void *);
 extern void comm_remove_close_handler(int fd, PF *, void *);
 extern int comm_udp_sendto(int, const struct sockaddr_in *, int, const void *, int);
@@ -258,6 +259,7 @@
 extern void eventFreeMemory(void);
 extern int eventFind(EVH *, void *);
 
+extern void fd_init(void);
 extern void fd_close(int fd);
 extern void fd_open(int fd, unsigned int type, const char *);
 extern void fd_note(int fd, const char *);
Index: squid/src/structs.h
diff -u squid/src/structs.h:1.133 squid/src/structs.h:1.112.2.2
--- squid/src/structs.h:1.133	Sat Sep 30 14:52:28 2006
+++ squid/src/structs.h	Sat Sep 30 17:59:27 2006
@@ -921,6 +921,9 @@
 	long handle;
     } win32;
 #endif
+#if DELAY_POOLS
+    int slow_id;
+#endif
 };
 
 struct _fileMap {
