epoll源码分析---sys_epoll_wait()函数
一、sys_epoll_wait()函数
源码及分析如下所示:
/* * @head:已经就绪的文件列表 * @priv:用来存储已经就绪的文件 */static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, void *priv){struct ep_send_events_data *esed = priv;int eventcnt;unsigned int revents;struct epitem *epi;struct epoll_event __user *uevent;/* * We can loop without lock because we are passed a task private list. * Items cannot vanish during the loop because ep_scan_ready_list() is * holding "mtx" during this call. */for (eventcnt = 0, uevent = esed->events; !list_empty(head) && eventcnt < esed->maxevents;) {epi = list_first_entry(head, struct epitem, rdllink);list_del_init(&epi->rdllink);/* * 调用文件的poll函数有两个作用,一是在文件的唤醒 * 队列上注册回调函数,二是返回文件当前的事件状 * 态,如果第二个参数为NULL,则只是查看文件当前 * 状态。 */revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) &epi->event.events;/* * If the event mask intersect the caller-requested one, * deliver the event to userspace. Again, ep_scan_ready_list() * is holding "mtx", so no operations coming from userspace * can change the item. */if (revents) {/* * 向用户内存传值失败时,将当前epitem实例重新放回 * 到链表中,从这里也可以看出,在处理失败后,head指向的 * 链表(对应ep_scan_ready_list()中的临时变量txlist)中 * 有可能会没有完全处理完,因此在ep_scan_ready_list()中 * 需要下面的语句 * list_splice(&txlist, &ep->rdllist); * 来将未处理的事件重新放回到eventpoll文件的就绪队列中。 */if (__put_user(revents, &uevent->events) || __put_user(epi->event.data, &uevent->data)) {list_add(&epi->rdllink, head);/* * 如果此时已经获取了部分事件,则返回已经获取的事件个数, * 否则返回EFAULT错误。 */return eventcnt ? eventcnt : -EFAULT;}eventcnt++;uevent++;if (epi->event.events & EPOLLONESHOT)epi->event.events &= EP_PRIVATE_BITS;/* * 如果是触发方式不是边缘触发(Edge Trigger),而是水平 * 触发(Level Trigger),需要将当前的epitem实例添加回 * 链表中,下次读取事件时会再次上报。 */else if (!(epi->event.events & EPOLLET)) {/* * If this file has been added with Level * Trigger mode, we need to insert back inside * the ready list, so that the next call to * epoll_wait() will check again the events * availability. At this point, noone can insert * into ep->rdllist besides us. The epoll_ctl() * callers are locked out by * ep_scan_ready_list() holding "mtx" and the * poll callback will queue them in ep->ovflist. */list_add_tail(&epi->rdllink, &ep->rdllist);}}}return eventcnt;}