linux Qdisc实现

概念

qdisc(queuing discipline): 在协议栈最终调用设备驱动发送skb前,会调用qdisc模块来做QoS,之后再调用实际的发送函数
linux的默认采用的FIFO的算法: tcp有tsq来实现流直接的公平,但是其他协议比如udp没有实现。

dev_queue_xmit

dev_queue_xmit()->__dev_queue_xmit()->__dev_xmit_skb()
最终调用qdisc的enqueue函数,把数据把加入到qdisc中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
int dev_queue_xmit(struct sk_buff *skb)
{
return __dev_queue_xmit(skb, NULL);
}
static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
{
...
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
rcu_read_lock_bh();
skb_update_prio(skb);
qdisc_pkt_len_init(skb); //算上gso分段后的总大小
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
#endif
/* If device/qdisc don't need skb->dst, release it right now while
* its hot in this cpu cache.
*/
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(skb);
else
skb_dst_force(skb);
txq = netdev_pick_tx(dev, skb, accel_priv); //选择设备的发送队列
q = rcu_dereference_bh(txq->qdisc);
if (q->enqueue) {
rc = __dev_xmit_skb(skb, q, dev, txq);
goto out;
}
//这个设备没有qdisc,可能是loopback等虚拟设备
/* The device has no queue. Common case for software devices:
loopback, all the sorts of tunnels...
Really, it is unlikely that netif_tx_lock protection is necessary
here. (f.e. loopback and IP tunnels are clean ignoring statistics
counters.)
However, it is possible, that they rely on protection
made by us here.
Check this and shot the lock. It is not prone from deadlocks.
Either shot noqueue qdisc, it is even simpler 8)
*/
if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
if (txq->xmit_lock_owner != cpu) {
skb = validate_xmit_skb(skb, dev);
if (!skb)
goto out;
HARD_TX_LOCK(dev, txq, cpu);
if (!netif_xmit_stopped(txq)) {
__this_cpu_inc(xmit_recursion);
skb = dev_hard_start_xmit(skb, dev, txq, &rc);
__this_cpu_dec(xmit_recursion);
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
goto out;
}
}
HARD_TX_UNLOCK(dev, txq);
net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
dev->name);
} else {
/* Recursion is detected! It is possible,
* unfortunately
*/
recursion_alert:
net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
dev->name);
}
}
rc = -ENETDOWN;
rcu_read_unlock_bh();
atomic_long_inc(&dev->tx_dropped);
kfree_skb_list(skb);
return rc;
out:
rcu_read_unlock_bh();
return rc;
}
static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev,
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
struct sk_buff *to_free = NULL;
bool contended;
int rc;
qdisc_calculate_pkt_len(skb, q);
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
* This permits qdisc->running owner to get the lock more
* often and dequeue packets faster.
*/
contended = qdisc_is_running(q);
if (unlikely(contended))
spin_lock(&q->busylock); //正在运行则busylock
spin_lock(root_lock);
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
__qdisc_drop(skb, &to_free);
rc = NET_XMIT_DROP;
} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
qdisc_run_begin(q)) { // 如果qdisc中skb队列的长度为0,并且可以忽略qdisc规则(pfifo_fast设置有这个标志), 尝试直接发送
/*
* This is a work-conserving queue; there are no old skbs
* waiting to be sent out; and the qdisc is not running -
* xmit the skb directly.
*/
qdisc_bstats_update(q, skb);
if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) { //直接发送这个skb
if (unlikely(contended)) {
spin_unlock(&q->busylock);
contended = false;
}
__qdisc_run(q); //直接发完发现qdisc中还有,则调用dequeue后发送
} else
qdisc_run_end(q); //直接发送发现qdisc中没有了,结束
rc = NET_XMIT_SUCCESS;
} else {
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; //排入qdisc队列,默认pfifo_fast_ops算法,pfifo_fast_enqueue
if (qdisc_run_begin(q)) { //之前qdisc在running,现在停了(有其他人在dequeue这个qdisc)
if (unlikely(contended)) { //释放之前的锁
spin_unlock(&q->busylock);
contended = false;
}
__qdisc_run(q); //出队并发送
}
}
spin_unlock(root_lock);
if (unlikely(to_free))
kfree_skb_list(to_free);
if (unlikely(contended))
spin_unlock(&q->busylock);
return rc;
}

enqueue

默认的pfifo_fast,根据skb的优先级,排入qdisc的3个队列中的一个。 如果qidsc总的队列长度(ifconfig中的txqueuelen)满了,则丢弃

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
/* 3-band FIFO queue: old style, but should be a bit faster than
generic prio+fifo combination.
*/
#define PFIFO_FAST_BANDS 3
/*
* Private data for a pfifo_fast scheduler containing:
* - queues for the three band
* - bitmap indicating which of the bands contain skbs
*/
struct pfifo_fast_priv {
u32 bitmap;
struct qdisc_skb_head q[PFIFO_FAST_BANDS];
};
static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv,
int band)
{
return priv->q + band;
}
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
struct sk_buff **to_free)
{
if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) { //没有达到队列最大值
int band = prio2band[skb->priority & TC_PRIO_MAX]; //根据优先级计算排入的队列,总共3个队列
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
struct qdisc_skb_head *list = band2list(priv, band);
priv->bitmap |= (1 << band); //标记这个队列包含数据
qdisc->q.qlen++;
return __qdisc_enqueue_tail(skb, qdisc, list); //插入list尾部
}
return qdisc_drop(skb, qdisc, to_free); //添加skb到to_free列表中,交给上层丢弃
}

dequeue

在__dev_xmit_skb中,如果没有人占有锁,则会调用__qdisc_run来dequeue并发送

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
void __qdisc_run(struct Qdisc *q)
{
int quota = weight_p;
int packets;
while (qdisc_restart(q, &packets)) { //一次运行配额为64个包, 返回非0表示队列中还有数据
/*
* Ordered by possible occurrence: Postpone processing if
* 1. we've exceeded packet quota
* 2. another process needs the CPU;
*/
quota -= packets;
if (quota <= 0 || need_resched()) {
__netif_schedule(q);
break;
}
}
qdisc_run_end(q); //结束本次发送
}
/*
* NOTE: Called under qdisc_lock(q) with locally disabled BH.
*
* running seqcount guarantees only one CPU can process
* this qdisc at a time. qdisc_lock(q) serializes queue accesses for
* this queue.
*
* netif_tx_lock serializes accesses to device driver.
*
* qdisc_lock(q) and netif_tx_lock are mutually exclusive,
* if one is grabbed, another must be free.
*
* Note, that this procedure can be called by a watchdog timer
*
* Returns to the caller:
* 0 - queue is empty or throttled.
* >0 - queue is not empty.
*
*/
static inline int qdisc_restart(struct Qdisc *q, int *packets)
{
struct netdev_queue *txq;
struct net_device *dev;
spinlock_t *root_lock;
struct sk_buff *skb;
bool validate;
/* Dequeue packet */
skb = dequeue_skb(q, &validate, packets);
if (unlikely(!skb))
return 0;
root_lock = qdisc_lock(q);
dev = qdisc_dev(q);
txq = skb_get_tx_queue(dev, skb);
return sch_direct_xmit(skb, q, dev, txq, root_lock, validate); //直接发送skb
}
/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
* A requeued skb (via q->gso_skb) can also be a SKB list.
*/
static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
int *packets)
{
struct sk_buff *skb = q->gso_skb;
const struct netdev_queue *txq = q->dev_queue;
*packets = 1;
if (unlikely(skb)) {
/* skb in gso_skb were already validated */
*validate = false;
/* check the reason of requeuing without tx lock first */
txq = skb_get_tx_queue(txq->dev, skb);
if (!netif_xmit_frozen_or_stopped(txq)) {
q->gso_skb = NULL;
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
} else
skb = NULL;
return skb;
}
*validate = true;
skb = q->skb_bad_txq;
if (unlikely(skb)) {
/* check the reason of requeuing without tx lock first */
txq = skb_get_tx_queue(txq->dev, skb);
if (!netif_xmit_frozen_or_stopped(txq)) {
q->skb_bad_txq = NULL;
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
goto bulk;
}
return NULL;
}
if (!(q->flags & TCQ_F_ONETXQUEUE) ||
!netif_xmit_frozen_or_stopped(txq))
skb = q->dequeue(q); //pfifo_fast_dequeue fq_dequeue
if (skb) {
bulk: //继续dequeue
if (qdisc_may_bulk(q))
try_bulk_dequeue_skb(q, skb, txq, packets); //满足bql限制
else
try_bulk_dequeue_skb_slow(q, skb, packets); //保证dequeue后的skb发到同一个队列
}
return skb;
}
static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
struct sk_buff *skb,
int *packets)
{
int mapping = skb_get_queue_mapping(skb);
struct sk_buff *nskb;
int cnt = 0;
do {
nskb = q->dequeue(q);
if (!nskb)
break;
if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
q->skb_bad_txq = nskb;
qdisc_qstats_backlog_inc(q, nskb);
q->q.qlen++;
break;
}
skb->next = nskb;
skb = nskb;
} while (++cnt < 8);
(*packets) += cnt;
skb->next = NULL;
}
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
{
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
int band = bitmap2band[priv->bitmap];
if (likely(band >= 0)) { //有数据
struct qdisc_skb_head *qh = band2list(priv, band);
struct sk_buff *skb = __qdisc_dequeue_head(qh);
if (likely(skb != NULL)) {
qdisc_qstats_backlog_dec(qdisc, skb);
qdisc_bstats_update(qdisc, skb);
}
qdisc->q.qlen--;
if (qh->qlen == 0)
priv->bitmap &= ~(1 << band);
return skb;
}
return NULL;
}

__netif_reschedule

__netif_reschedule会把qdisc放入softnet_data的output_queue中,然后推迟到软中断中处理dequeue

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
static void __netif_reschedule(struct Qdisc *q)
{
struct softnet_data *sd;
unsigned long flags;
local_irq_save(flags);
sd = this_cpu_ptr(&softnet_data);
q->next_sched = NULL;
*sd->output_queue_tailp = q;
sd->output_queue_tailp = &q->next_sched;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
}
void __netif_schedule(struct Qdisc *q)
{
if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
__netif_reschedule(q);
}
static __latent_entropy void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
...
if (sd->output_queue) {
struct Qdisc *head;
local_irq_disable();
head = sd->output_queue;
sd->output_queue = NULL;
sd->output_queue_tailp = &sd->output_queue;
local_irq_enable();
while (head) {
struct Qdisc *q = head;
spinlock_t *root_lock;
head = head->next_sched;
root_lock = qdisc_lock(q);
spin_lock(root_lock);
/* We need to make sure head->next_sched is read
* before clearing __QDISC_STATE_SCHED
*/
smp_mb__before_atomic();
clear_bit(__QDISC_STATE_SCHED, &q->state);
qdisc_run(q); // qdisc dequeue发送
spin_unlock(root_lock);
}
}
}

qdisc初始化

pktsched_init注册了几个系统算法,register_qdisc只是添加算法到一个全局的链表中
注册设备驱动的时候会调用register_netdevice(), register_netdevice()会调用dev_init_scheduler来初始化默认的qidsc为noop_qdisc,
noop_qdisc算法什么也不做,enqueue函数为丢弃数据包。
当ifconfig up网卡后,会调用dev_open()最终分配默认的qdisc算法给队列

初始化noop_qdisc算法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
static int __init pktsched_init(void)
{
int err;
err = register_pernet_subsys(&psched_net_ops);
if (err) {
pr_err("pktsched_init: "
"cannot initialize per netns operations\n");
return err;
}
register_qdisc(&pfifo_fast_ops);
register_qdisc(&pfifo_qdisc_ops);
register_qdisc(&bfifo_qdisc_ops);
register_qdisc(&pfifo_head_drop_qdisc_ops);
register_qdisc(&mq_qdisc_ops);
register_qdisc(&noqueue_qdisc_ops);
rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
NULL);
rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
NULL);
return 0;
}
subsys_initcall(pktsched_init);
int register_netdevice(struct net_device *dev)
{
...
dev_init_scheduler(dev);
...
}
void dev_init_scheduler(struct net_device *dev)
{
dev->qdisc = &noop_qdisc;
netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
}
static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
struct sk_buff **to_free)
{
__qdisc_drop(skb, to_free);
return NET_XMIT_CN;
}
static struct sk_buff *noop_dequeue(struct Qdisc *qdisc)
{
return NULL;
}
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
.id = "noop",
.priv_size = 0,
.enqueue = noop_enqueue,
.dequeue = noop_dequeue,
.peek = noop_dequeue,
.owner = THIS_MODULE,
};
static struct netdev_queue noop_netdev_queue = {
.qdisc = &noop_qdisc,
.qdisc_sleeping = &noop_qdisc,
};
struct Qdisc noop_qdisc = {
.enqueue = noop_enqueue,
.dequeue = noop_dequeue,
.flags = TCQ_F_BUILTIN,
.ops = &noop_qdisc_ops,
.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.dev_queue = &noop_netdev_queue,
.running = SEQCNT_ZERO(noop_qdisc.running),
.busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
};
EXPORT_SYMBOL(noop_qdisc);

初始化默认qdisc算法

需要注意的是,对于多队列设备,会先调用mq_qdisc_ops算法来做一次代理
在mq_init中为每个队列分配默认的pfifo_fast_ops算法保持在mq_qdisc_ops的private结构中
然后马上调用mq_attach, 把这些分配好的qdisc,设置到设备队列的qdisc_sleeping中。
最终在dev_activate中把qdisc_sleeping中的qdisc算法,通过transition_one_qdisc赋值到netdev_queue->qdisc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
int dev_open(struct net_device *dev)
{
...
dev_activate(dev);
...
}
void dev_activate(struct net_device *dev)
{
int need_watchdog;
/* No queueing discipline is attached to device;
* create default one for devices, which need queueing
* and noqueue_qdisc for virtual interfaces
*/
if (dev->qdisc == &noop_qdisc)
attach_default_qdiscs(dev); //设置默认qdisc算法
if (!netif_carrier_ok(dev))
/* Delay activation until next carrier-on event */
return;
need_watchdog = 0;
netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
if (dev_ingress_queue(dev))
transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
if (need_watchdog) {
netif_trans_update(dev);
dev_watchdog_up(dev);
}
}
static void transition_one_qdisc(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_need_watchdog)
{
struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
int *need_watchdog_p = _need_watchdog;
if (!(new_qdisc->flags & TCQ_F_BUILTIN))
clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
if (need_watchdog_p) {
dev_queue->trans_start = 0;
*need_watchdog_p = 1;
}
}
static void attach_default_qdiscs(struct net_device *dev)
{
struct netdev_queue *txq;
struct Qdisc *qdisc;
txq = netdev_get_tx_queue(dev, 0);
if (!netif_is_multiqueue(dev) ||
dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
dev->qdisc = txq->qdisc_sleeping;
atomic_inc(&dev->qdisc->refcnt);
} else {
qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT); //调用mq_init
if (qdisc) {
dev->qdisc = qdisc;
qdisc->ops->attach(qdisc); //mq_attach
}
}
#ifdef CONFIG_NET_SCHED
if (dev->qdisc)
qdisc_hash_add(dev->qdisc);
#endif
}
struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
const struct Qdisc_ops *ops,
unsigned int parentid)
{
struct Qdisc *sch;
if (!try_module_get(ops->owner))
return NULL;
sch = qdisc_alloc(dev_queue, ops);
if (IS_ERR(sch)) {
module_put(ops->owner);
return NULL;
}
sch->parent = parentid;
if (!ops->init || ops->init(sch, NULL) == 0) //mq_init
return sch;
qdisc_destroy(sch);
return NULL;
}
struct mq_sched {
struct Qdisc **qdiscs;
};
static int mq_init(struct Qdisc *sch, struct nlattr *opt)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
struct netdev_queue *dev_queue;
struct Qdisc *qdisc;
unsigned int ntx;
if (sch->parent != TC_H_ROOT)
return -EOPNOTSUPP;
if (!netif_is_multiqueue(dev))
return -EOPNOTSUPP;
/* pre-allocate qdiscs, attachment can't fail */
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
GFP_KERNEL);
if (priv->qdiscs == NULL)
return -ENOMEM;
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
dev_queue = netdev_get_tx_queue(dev, ntx);
qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx), //为每个队列分配默认的qdisc算法
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(ntx + 1)));
if (qdisc == NULL)
goto err;
priv->qdiscs[ntx] = qdisc;
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
sch->flags |= TCQ_F_MQROOT;
return 0;
err:
mq_destroy(sch);
return -ENOMEM;
}
static inline const struct Qdisc_ops *
get_default_qdisc_ops(const struct net_device *dev, int ntx)
{
return ntx < dev->real_num_tx_queues ?
default_qdisc_ops : &pfifo_fast_ops;
}
static void mq_attach(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
struct Qdisc *qdisc, *old;
unsigned int ntx;
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
qdisc = priv->qdiscs[ntx];
old = dev_graft_qdisc(qdisc->dev_queue, qdisc); //设置qdisc到设备队列的qdisc_sleeping中
if (old)
qdisc_destroy(old);
#ifdef CONFIG_NET_SCHED
if (ntx < dev->real_num_tx_queues)
qdisc_hash_add(qdisc);
#endif
}
kfree(priv->qdiscs);
priv->qdiscs = NULL;
}
/* Attach toplevel qdisc to device queue. */
struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
struct Qdisc *qdisc)
{
struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
spinlock_t *root_lock;
root_lock = qdisc_lock(oqdisc);
spin_lock_bh(root_lock);
/* Prune old scheduler */
if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
qdisc_reset(oqdisc);
/* ... and graft new one */
if (qdisc == NULL)
qdisc = &noop_qdisc;
dev_queue->qdisc_sleeping = qdisc;
rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
spin_unlock_bh(root_lock);
return oqdisc;
}