Linux中处理需要传输的IP报文流程
本文主要讲解了Linux中处理需要传输的IP报文流程,使用的内核的版本是2.6.32.27
为了方便理解,本文采用整体流程图加伪代码的方式对Linux中处理需要传输的IP报文流程进行了讲解,希望可以对大家有所帮助。阅读本文章假设大家对C语言有了一定的了解
首先从IP的更高层传输层看看是如何管理的
//-----------------------------------------------------------------------------------------------/*ipv4.c中注册的让上层协议使用的接口*/static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {.queue_xmit = ip_queue_xmit,};/*将dccp_ipv4_af_ops注册到协议中*/static int dccp_v4_init_sock(struct sock *sk){inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops;}/*TCP数据报文发送函数*/static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask){const struct inet_connection_sock *icsk = inet_csk(sk);/*使用ip_queue_xmit发送数据报文*/err = icsk->icsk_af_ops->queue_xmit(skb, 0);}//-----------------------------------------------------------------------------------------------int ip_queue_xmit(struct sk_buff *skb, int ipfragok){struct sock *sk = skb->sk;struct inet_sock *inet = inet_sk(sk);struct ip_options *opt = inet->opt;struct rtable *rt;struct iphdr *iph;/*检查套接字结构中sk->dst中是否有一个指针指向路由缓存中的某个入口项 *如果有,再检查这个指针是否有效,由于套接字的所有包都去往同一个目标 *地址,因此路由就存放在skb->_skb_dst中,内容为dst_entry结构 */rt = skb_rtable(skb);if (rt != NULL)goto packet_routed;rt = (struct rtable *)__sk_dst_check(sk, 0);{if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL){sk->sk_dst_cache = NULL;dst_release(dst);return NULL;}}/*如果尚未设置路由,那么使用ip_route_output_flow进行路由选路*/if (rt == NULL) {//......if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))goto no_route;}//......packet_routed:/*填充IP报头*///.....iph->ttl = ip_select_ttl(inet, &rt->u.dst);iph->protocol = sk->sk_protocol;iph->saddr = rt->rt_src;iph->daddr = rt->rt_dst;/*填充IP选项*/if (opt && opt->optlen) {iph->ihl += opt->optlen >> 2;ip_options_build(skb, opt, inet->daddr, rt, 0);}//......return ip_local_out(skb);no_route://.....}int ip_local_out(struct sk_buff *skb){int err;err = __ip_local_out(skb);if (likely(err == 1))err = dst_output(skb);return err;}int __ip_local_out(struct sk_buff *skb){struct iphdr *iph = ip_hdr(skb);iph->tot_len = htons(skb->len);ip_send_check(iph);/*进入 NF_INET_LOCAL_OUT 的序列钩子进行处理,处理之后放入dst_output中处理*/return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, dst_output);}static inline int dst_output(struct sk_buff *skb){/*调用dst_entry中注册的output函数,IP单播也就是ip_output函数*/return skb_dst(skb)->output(skb);}/*在__mkroute_output中曾经对output和input进行过注册*/static int __mkroute_output(struct rtable **result, struct fib_result *res, const struct flowi *fl, const struct flowi *oldflp, struct net_device *dev_out, unsigned flags){struct rtable *rth;rth->u.dst.output=ip_output;if (flags & RTCF_LOCAL) {rth->u.dst.input = ip_local_deliver;}if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)){rth->u.dst.output = ip_mc_output;}if (res->type == RTN_MULTICAST){rth->u.dst.input = ip_mr_input;rth->u.dst.output = ip_mc_output;}}}/*IPV4单播*/int ip_output(struct sk_buff *skb){struct net_device *dev = skb_dst(skb)->dev;skb->dev = dev;skb->protocol = htons(ETH_P_IP);/*经过 NF_INET_POST_ROUTING 处理链后,进入ip_finish_output处理*/return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED));}static int ip_finish_output(struct sk_buff *skb){/*IP分片后,进入ip_finish_output2处理*/if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))return ip_fragment(skb, ip_finish_output2);elsereturn ip_finish_output2(skb);}static inline int ip_finish_output2(struct sk_buff *skb){/*如果没有二层头,启用ARP处理*/if (dst->hh)return neigh_hh_output(dst->hh, skb);/*如果有二层头进行处理,侧使用dst->neighbour->output也就是 dev_queue_xmit*/else if (dst->neighbour)return dst->neighbour->output(skb);}/*dev_queue_xmit在ARP中的注册过程如下*/static const struct neigh_ops arp_hh_ops = {.family =AF_INET,.output =neigh_resolve_output,.hh_output =dev_queue_xmit,};static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, __be16 protocol){struct hh_cache*hh;//......if (n->nud_state & NUD_CONNECTED)hh->hh_output = n->ops->hh_output; /*也就是dev_queue_xmit*/elsehh->hh_output = n->ops->output;//......}
关于处理流程的整体架构图,请参见我的上一篇博客
<<Linux内核IP层的报文处理流程--从网卡接收的报文处理流程>>
关于二层是如何继续处理报文并发送的,请参考博客
<<Linux内核数据包的发送传输>>
希望大家批评指正