MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
devpoll.c
1 /*
2  * Copyright 2000-2004 Niels Provos <provos@citi.umich.edu>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. The name of the author may not be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30 
31 #include <sys/types.h>
32 #include <sys/resource.h>
33 #ifdef HAVE_SYS_TIME_H
34 #include <sys/time.h>
35 #else
36 #include <sys/_time.h>
37 #endif
38 #include <sys/queue.h>
39 #include <sys/devpoll.h>
40 #include <signal.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <unistd.h>
45 #include <fcntl.h>
46 #include <errno.h>
47 #include <assert.h>
48 
49 #include "event.h"
50 #include "event-internal.h"
51 #include "evsignal.h"
52 #include "log.h"
53 
54 /* due to limitations in the devpoll interface, we need to keep track of
55  * all file descriptors outself.
56  */
57 struct evdevpoll {
58  struct event *evread;
59  struct event *evwrite;
60 };
61 
62 struct devpollop {
63  struct evdevpoll *fds;
64  int nfds;
65  struct pollfd *events;
66  int nevents;
67  int dpfd;
68  struct pollfd *changes;
69  int nchanges;
70 };
71 
72 static void *devpoll_init (struct event_base *);
73 static int devpoll_add (void *, struct event *);
74 static int devpoll_del (void *, struct event *);
75 static int devpoll_dispatch (struct event_base *, void *, struct timeval *);
76 static void devpoll_dealloc (struct event_base *, void *);
77 
78 const struct eventop devpollops = {
79  "devpoll",
80  devpoll_init,
81  devpoll_add,
82  devpoll_del,
83  devpoll_dispatch,
84  devpoll_dealloc,
85  1 /* need reinit */
86 };
87 
88 #define NEVENT 32000
89 
90 static int
91 devpoll_commit(struct devpollop *devpollop)
92 {
93  /*
94  * Due to a bug in Solaris, we have to use pwrite with an offset of 0.
95  * Write is limited to 2GB of data, until it will fail.
96  */
97  if (pwrite(devpollop->dpfd, devpollop->changes,
98  sizeof(struct pollfd) * devpollop->nchanges, 0) == -1)
99  return(-1);
100 
101  devpollop->nchanges = 0;
102  return(0);
103 }
104 
105 static int
106 devpoll_queue(struct devpollop *devpollop, int fd, int events) {
107  struct pollfd *pfd;
108 
109  if (devpollop->nchanges >= devpollop->nevents) {
110  /*
111  * Change buffer is full, must commit it to /dev/poll before
112  * adding more
113  */
114  if (devpoll_commit(devpollop) != 0)
115  return(-1);
116  }
117 
118  pfd = &devpollop->changes[devpollop->nchanges++];
119  pfd->fd = fd;
120  pfd->events = events;
121  pfd->revents = 0;
122 
123  return(0);
124 }
125 
126 static void *
127 devpoll_init(struct event_base *base)
128 {
129  int dpfd, nfiles = NEVENT;
130  struct rlimit rl;
131  struct devpollop *devpollop;
132 
133  /* Disable devpoll when this environment variable is set */
134  if (getenv("EVENT_NODEVPOLL"))
135  return (NULL);
136 
137  if (!(devpollop = calloc(1, sizeof(struct devpollop))))
138  return (NULL);
139 
140  if (getrlimit(RLIMIT_NOFILE, &rl) == 0 &&
141  rl.rlim_cur != RLIM_INFINITY)
142  nfiles = rl.rlim_cur;
143 
144  /* Initialize the kernel queue */
145  if ((dpfd = open("/dev/poll", O_RDWR)) == -1) {
146  event_warn("open: /dev/poll");
147  free(devpollop);
148  return (NULL);
149  }
150 
151  devpollop->dpfd = dpfd;
152 
153  /* Initialize fields */
154  devpollop->events = calloc(nfiles, sizeof(struct pollfd));
155  if (devpollop->events == NULL) {
156  free(devpollop);
157  close(dpfd);
158  return (NULL);
159  }
160  devpollop->nevents = nfiles;
161 
162  devpollop->fds = calloc(nfiles, sizeof(struct evdevpoll));
163  if (devpollop->fds == NULL) {
164  free(devpollop->events);
165  free(devpollop);
166  close(dpfd);
167  return (NULL);
168  }
169  devpollop->nfds = nfiles;
170 
171  devpollop->changes = calloc(nfiles, sizeof(struct pollfd));
172  if (devpollop->changes == NULL) {
173  free(devpollop->fds);
174  free(devpollop->events);
175  free(devpollop);
176  close(dpfd);
177  return (NULL);
178  }
179 
180  evsignal_init(base);
181 
182  return (devpollop);
183 }
184 
185 static int
186 devpoll_recalc(struct event_base *base, void *arg, int max)
187 {
188  struct devpollop *devpollop = arg;
189 
190  if (max >= devpollop->nfds) {
191  struct evdevpoll *fds;
192  int nfds;
193 
194  nfds = devpollop->nfds;
195  while (nfds <= max)
196  nfds <<= 1;
197 
198  fds = realloc(devpollop->fds, nfds * sizeof(struct evdevpoll));
199  if (fds == NULL) {
200  event_warn("realloc");
201  return (-1);
202  }
203  devpollop->fds = fds;
204  memset(fds + devpollop->nfds, 0,
205  (nfds - devpollop->nfds) * sizeof(struct evdevpoll));
206  devpollop->nfds = nfds;
207  }
208 
209  return (0);
210 }
211 
212 static int
213 devpoll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
214 {
215  struct devpollop *devpollop = arg;
216  struct pollfd *events = devpollop->events;
217  struct dvpoll dvp;
218  struct evdevpoll *evdp;
219  int i, res, timeout = -1;
220 
221  if (devpollop->nchanges)
222  devpoll_commit(devpollop);
223 
224  if (tv != NULL)
225  timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
226 
227  dvp.dp_fds = devpollop->events;
228  dvp.dp_nfds = devpollop->nevents;
229  dvp.dp_timeout = timeout;
230 
231  res = ioctl(devpollop->dpfd, DP_POLL, &dvp);
232 
233  if (res == -1) {
234  if (errno != EINTR) {
235  event_warn("ioctl: DP_POLL");
236  return (-1);
237  }
238 
239  evsignal_process(base);
240  return (0);
241  } else if (base->sig.evsignal_caught) {
242  evsignal_process(base);
243  }
244 
245  event_debug(("%s: devpoll_wait reports %d", __func__, res));
246 
247  for (i = 0; i < res; i++) {
248  int which = 0;
249  int what = events[i].revents;
250  struct event *evread = NULL, *evwrite = NULL;
251 
252  assert(events[i].fd < devpollop->nfds);
253  evdp = &devpollop->fds[events[i].fd];
254 
255  if (what & POLLHUP)
256  what |= POLLIN | POLLOUT;
257  else if (what & POLLERR)
258  what |= POLLIN | POLLOUT;
259 
260  if (what & POLLIN) {
261  evread = evdp->evread;
262  which |= EV_READ;
263  }
264 
265  if (what & POLLOUT) {
266  evwrite = evdp->evwrite;
267  which |= EV_WRITE;
268  }
269 
270  if (!which)
271  continue;
272 
273  if (evread != NULL && !(evread->ev_events & EV_PERSIST))
274  event_del(evread);
275  if (evwrite != NULL && evwrite != evread &&
276  !(evwrite->ev_events & EV_PERSIST))
277  event_del(evwrite);
278 
279  if (evread != NULL)
280  event_active(evread, EV_READ, 1);
281  if (evwrite != NULL)
282  event_active(evwrite, EV_WRITE, 1);
283  }
284 
285  return (0);
286 }
287 
288 
289 static int
290 devpoll_add(void *arg, struct event *ev)
291 {
292  struct devpollop *devpollop = arg;
293  struct evdevpoll *evdp;
294  int fd, events;
295 
296  if (ev->ev_events & EV_SIGNAL)
297  return (evsignal_add(ev));
298 
299  fd = ev->ev_fd;
300  if (fd >= devpollop->nfds) {
301  /* Extend the file descriptor array as necessary */
302  if (devpoll_recalc(ev->ev_base, devpollop, fd) == -1)
303  return (-1);
304  }
305  evdp = &devpollop->fds[fd];
306 
307  /*
308  * It's not necessary to OR the existing read/write events that we
309  * are currently interested in with the new event we are adding.
310  * The /dev/poll driver ORs any new events with the existing events
311  * that it has cached for the fd.
312  */
313 
314  events = 0;
315  if (ev->ev_events & EV_READ) {
316  if (evdp->evread && evdp->evread != ev) {
317  /* There is already a different read event registered */
318  return(-1);
319  }
320  events |= POLLIN;
321  }
322 
323  if (ev->ev_events & EV_WRITE) {
324  if (evdp->evwrite && evdp->evwrite != ev) {
325  /* There is already a different write event registered */
326  return(-1);
327  }
328  events |= POLLOUT;
329  }
330 
331  if (devpoll_queue(devpollop, fd, events) != 0)
332  return(-1);
333 
334  /* Update events responsible */
335  if (ev->ev_events & EV_READ)
336  evdp->evread = ev;
337  if (ev->ev_events & EV_WRITE)
338  evdp->evwrite = ev;
339 
340  return (0);
341 }
342 
343 static int
344 devpoll_del(void *arg, struct event *ev)
345 {
346  struct devpollop *devpollop = arg;
347  struct evdevpoll *evdp;
348  int fd, events;
349  int needwritedelete = 1, needreaddelete = 1;
350 
351  if (ev->ev_events & EV_SIGNAL)
352  return (evsignal_del(ev));
353 
354  fd = ev->ev_fd;
355  if (fd >= devpollop->nfds)
356  return (0);
357  evdp = &devpollop->fds[fd];
358 
359  events = 0;
360  if (ev->ev_events & EV_READ)
361  events |= POLLIN;
362  if (ev->ev_events & EV_WRITE)
363  events |= POLLOUT;
364 
365  /*
366  * The only way to remove an fd from the /dev/poll monitored set is
367  * to use POLLREMOVE by itself. This removes ALL events for the fd
368  * provided so if we care about two events and are only removing one
369  * we must re-add the other event after POLLREMOVE.
370  */
371 
372  if (devpoll_queue(devpollop, fd, POLLREMOVE) != 0)
373  return(-1);
374 
375  if ((events & (POLLIN|POLLOUT)) != (POLLIN|POLLOUT)) {
376  /*
377  * We're not deleting all events, so we must resubmit the
378  * event that we are still interested in if one exists.
379  */
380 
381  if ((events & POLLIN) && evdp->evwrite != NULL) {
382  /* Deleting read, still care about write */
383  devpoll_queue(devpollop, fd, POLLOUT);
384  needwritedelete = 0;
385  } else if ((events & POLLOUT) && evdp->evread != NULL) {
386  /* Deleting write, still care about read */
387  devpoll_queue(devpollop, fd, POLLIN);
388  needreaddelete = 0;
389  }
390  }
391 
392  if (needreaddelete)
393  evdp->evread = NULL;
394  if (needwritedelete)
395  evdp->evwrite = NULL;
396 
397  return (0);
398 }
399 
400 static void
401 devpoll_dealloc(struct event_base *base, void *arg)
402 {
403  struct devpollop *devpollop = arg;
404 
405  evsignal_dealloc(base);
406  if (devpollop->fds)
407  free(devpollop->fds);
408  if (devpollop->events)
409  free(devpollop->events);
410  if (devpollop->changes)
411  free(devpollop->changes);
412  if (devpollop->dpfd >= 0)
413  close(devpollop->dpfd);
414 
415  memset(devpollop, 0, sizeof(struct devpollop));
416  free(devpollop);
417 }