当前位置：首页 > news >正文

Linux sch_fq公平队列FQ流分类与credit机制

news 2026/6/23 1:03:05

Linux sch_fq公平队列FQ流分类与credit机制

Fair Queue（FQ）qdisc位于net/sched/sch_fq.c，核心目标是每个流（flow）一个FIFO队列，按轮询（DRR, Deficit Round Robin）方式调度，保证各流间的公平性，同时支持 pacing 和 per-flow 限速。

FQ的流分类基于sk_buff的hash值，默认使用内核计算的skb->hash。fq_classify函数将报文分配到对应流：

static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb)
{
struct fq_sched_data *q = qdisc_priv(sch);
struct fq_flow *f;
u32 hash;

if (skb->protocol == htons(ETH_P_IP) ||
skb->protocol == htons(ETH_P_IPV6)) {
hash = skb_get_hash_perturb(skb, q->perturbation);
} else {
hash = skb->hash;
if (!hash)
hash = (u32)(unsigned long)skb_dst(skb) ^
skb->sk_hash;
}

hash = reciprocal_scale(hash, FQ_HASH_SIZE);
f = &q->flows[hash];

if (f->flowchain.prev == NULL) {
struct fq_flow *new_flow;

new_flow = fq_find_fitting_flow(q, skb, hash);
if (new_flow)
return new_flow;

if (f->qlen > 1 || f->stats.stoll > q->flow_refill)
return &q->internal;

f->fq_tin = skb_find_txq(skb, q->flow_max_rate ?
&q->rate_limiting_struct : NULL);
}

return f;
}

每个flow通过定时器进行pacing控制。fq_dequeue是核心调度函数：

static struct sk_buff *fq_dequeue(struct Qdisc *sch)
{
struct fq_sched_data *q = qdisc_priv(sch);
struct fq_flow *f;
struct sk_buff *skb;
u32 now = ktime_get_ns();
s64 credit;

f = list_first_entry_or_null(&q->new_flows, struct fq_flow,
flowchain);
if (!f) {
f = list_first_entry_or_null(&q->old_flows, struct fq_flow,
flowchain);
if (!f)
return NULL;
}

if (f->time_next_packet > now) {
if (!q->timer_active) {
q->timer_active = true;
hrtimer_start(&q->fq_timer,
ns_to_ktime(f->time_next_packet - now),
HRTIMER_MODE_REL_PINNED);
}
return NULL;
}

credit = f->credit;
skb = f->head;

if (skb) {
credit -= skb->len;

if (credit < 0 && !q->rate_enable) {
return NULL;
}

__skb_unlink(skb, &f->queue);
sch->q.qlen--;

f->credit = credit;
if (f->credit <= 0) {
f->credit += q->quantum;
list_move_tail(&f->flowchain, &q->old_flows);
}
}

if (f->credit > 0 && f->qlen > 0)
list_move_tail(&f->flowchain, &q->new_flows);
else if (f->qlen == 0)
list_del_init(&f->flowchain);

if (q->rate_enable) {
f->time_next_packet = now +
max_t(u64, q->flow_max_rate ?
div64_u64(skb->len * 1000ULL * NSEC_PER_USEC,
q->flow_max_rate) : 0,
skb->len * q->pacing_divider);
}

return skb;
}

credit机制是DRR的核心。每个flow拥有一个credit计数器，初始值为quantum。每次从该flow出队一个报文，减去其长度。当credit变为负数（或零以下）时，该flow被移到old_flows链表，并补充一个quantum的credit。调度器优先服务new_flows链表中的flow，只有new_flows为空时才处理old_flows。这种设计确保新产生的active flow不会被old_flows中的flow饿死。

quantum参数的默认值在fq_change函数中设定：

static int fq_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct fq_sched_data *q = qdisc_priv(sch);
struct tc_fq_qopt *ctl = nla_data(opt);

if (ctl->quantum)
q->quantum = ctl->quantum;
else
q->quantum = 2 * psched_mtu(qdisc_dev(sch));

if (ctl->initial_quantum)
q->initial_quantum = ctl->initial_quantum;
else
q->initial_quantum = 0;

if (ctl->maxrate) {
q->flow_max_rate = ctl->maxrate;
q->rate_enable = 1;
}

if (ctl->pacing) {
q->pacing_divisor = ctl->pacing;
}
}

在credit计算中，fq_dequeue对每个flow出队后重新计算credit：

static void fq_flow_add_to_list(struct fq_sched_data *q,
struct fq_flow *f)
{
if (f->credit <= 0) {
f->credit += q->quantum;
list_add_tail(&f->flowchain, &q->old_flows);
} else {
list_add_tail(&f->flowchain, &q->new_flows);
}
}

flow从new_flows转移到old_flows的条件是credit耗尽，转移后补充quantum credit。但如果补充后credit仍为正，则留在new_flows继续参与调度。这种设计保证credit用尽的flow暂时让出调度机会，实现按字节严格公平。

per-flow的pacing依靠fq->time_next_packet字段，hrtimer到期后才会调用fq_dequeue，控制报文发送速率。timer回调函数为fq_timer：

static enum hrtimer_restart fq_timer(struct hrtimer *timer)
{
struct fq_sched_data *q = container_of(timer, struct fq_sched_data,
fq_timer);
struct Qdisc *sch = q->sch;
struct net_device *dev = qdisc_dev(sch);

q->timer_active = false;
__netif_schedule(sch);

return HRTIMER_NORESTART;
}

timer仅触发一次调度，fq_dequeue在必要时重新启新timer。这种defer机制将pacing粒度控制在纳秒级，适合高速网卡下精确流量整形。