当前位置: 首页 > news >正文

linux内存管理-页面回收之内核线程 kswapd (四)

上面章节将kswapd内核线程的整理逻辑体现出来了,本节将介绍其中的很多细节函数。

1、shrink_active_list函数

首先看当不活跃LRU的页面数量少于活跃LRU的页面数量的情况。

shrink_active_list()函数扫描活跃LRU链表,看是否有页面可以迁移到不活跃LRU链表中。

/* 它的核心任务不是“直接回收”,而是把活跃链表尾部“可能不再活跃”的页面挑出来, 根据访问情况决定,仍然活跃的放回active,不再活跃的移到inactive,交给后续回收 */ static void shrink_active_list(unsigned long nr_to_scan/*本次要从active LRU中扫描多少页*/, struct lruvec *lruvec, struct scan_control *sc, enum lru_list lru) { unsigned long nr_taken; unsigned long nr_scanned; unsigned long vm_flags; /*被“剪下来”的页面 */ LIST_HEAD(l_hold); /* The pages which were snipped off */ /*仍然活跃的页面 */ LIST_HEAD(l_active); /*降级为inactive的页面*/ LIST_HEAD(l_inactive); struct page *page; struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; unsigned long nr_rotated = 0; isolate_mode_t isolate_mode = 0; /*是否为文件lru类型*/ int file = is_file_lru(lru); struct zone *zone = lruvec_zone(lruvec); /*排空 per-cpu LRU缓存*/ lru_add_drain(); /*设置隔离模式*/ if (!sc->may_unmap) /*不允许解除映射*/ isolate_mode |= ISOLATE_UNMAPPED; /*只隔离未映射页*/ if (!sc->may_writepage) /*不允许回写*/ isolate_mode |= ISOLATE_CLEAN; /*只隔离干净页*/ spin_lock_irq(&zone->lru_lock); /* 从active LRU尾部取出最多 nr_to_scan个页面放入 l_hold 内存管理中的底层隔离函数,用于把一个LRU页面安全地从LRU链表中摘下来,以便后续回收、迁移或压缩(compaction) */ nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, &nr_scanned, sc, isolate_mode, lru); if (global_reclaim(sc)) __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned); /*更新统计数据*/ reclaim_stat->recent_scanned[file] += nr_taken; __count_zone_vm_events(PGREFILL, zone, nr_scanned); __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); spin_unlock_irq(&zone->lru_lock); while (!list_empty(&l_hold)) { /*隔离出来的page*/ /* 允许在长时间循环中调度.防止RCU/调度延迟问题 */ cond_resched(); page = lru_to_page(&l_hold); list_del(&page->lru); /*将page从l_hold上删除*/ if (unlikely(!page_evictable(page))) { /*不可回收页面如被mlock(),放回LRU*/ putback_lru_page(page); continue; } /* 系统buffer_head 过多,尝试释放page private数据,释放后可以更快回收 */ if (unlikely(buffer_heads_over_limit)) { if (page_has_private(page) && trylock_page(page)) { if (page_has_private(page)) try_to_release_page(page, 0); unlock_page(page); } } /*判断页面是否被引用*/ if (page_referenced(page, 0, sc->target_mem_cgroup, &vm_flags)) { nr_rotated += hpage_nr_pages(page); /* * Identify referenced, file-backed active pages and * give them one more trip around the active list. So * that executable code get better chances to stay in * memory under moderate memory pressure. Anon pages * are not likely to be evicted by use-once streaming * IO, plus JVM can create lots of anon VM_EXEC pages, * so we ignore them here. */ if ((vm_flags & VM_EXEC/*可执行文件页*/) && page_is_file_cache(page)/*文件缓存页*/) { list_add(&page->lru, &l_active); /*防止频繁执行的代码被误回收,仍然活跃放回active*/ continue; } } /* 最近有访问的page页放入inactive???而不是继续保留在活跃链表中呢? 把最近有访问引用的页面全部都迁移到活跃LRU链表会产生一个比较大的可扩展性问题。 在一个内存很大的系统中,当系统用完了这些空闲内存时,每个页面都会被访问引用到,这种情况下我们不仅没有时间去扫描LRU链表, 并且还重新设置访问比特位(referenced bit),而这些信息没有什么用处。 所以从linux2.6.28开始,扫描活跃链表时会把页面全部都迁移到不活跃链表中。 这里只需要清硬件的访问比特位(page_referenced()来完成),当有访问引用时,扫描不活越LRU链表就迁移回活跃LRU链表中 */ ClearPageActive(page); /* we are de-activating */ list_add(&page->lru, &l_inactive); } /* * Move pages back to the lru list. */ spin_lock_irq(&zone->lru_lock); /* * Count referenced pages from currently used mappings as rotated, * even though only some of them are actually re-activated. This * helps balance scan pressure between file and anonymous pages in * get_scan_count. */ reclaim_stat->recent_rotated[file] += nr_rotated; /* 移动页面回LRU,l_active回到active LRU,l_inactive进入inactive LRU */ move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); mem_cgroup_uncharge_list(&l_hold); /*释放page,放回伙伴系统,此时l_hole中还有页面吗?*/ free_hot_cold_page_list(&l_hold, true); }

首先看下lru_add_drain函数,将pagevec缓存中的页面同步到对应的lru链表中,其中涉及需要添加到lru中页面缓存区lru_add_pvec,

将从非活跃到活跃lru页面操作的缓存区lru_rotate_pvecs,将从活跃到非活跃lru页面操作的缓存区lru_deactivate_pvecs,以及激活操作缓存区activate_page_pvecs。

这个函数内部的操纵非常的丰富。

void lru_add_drain(void) { lru_add_drain_cpu(get_cpu()); put_cpu(); } /*把指定CPU上尚未写入全局LRU链表的页面,全部同步到真正的LRU链表中*/ void lru_add_drain_cpu(int cpu) { struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu); /*将pagevec中的页面放入lru链表中*/ if (pagevec_count(pvec)) __pagevec_lru_add(pvec); /* 被访问过但仍留在inactive LRU的页面,需要rotate到LRU头部 lru_rotate_pvecs暂存需要“旋转”(Rotate)的页面,即近期被访问的非活跃页面,需将其从“非活跃列表”移到“活跃列表”,提升其回收优先级 */ pvec = &per_cpu(lru_rotate_pvecs, cpu); if (pagevec_count(pvec)) { unsigned long flags; /* No harm done if a racing interrupt already did this */ local_irq_save(flags); pagevec_move_tail(pvec); local_irq_restore(flags); } /* lru_deactivate_pvecs暂存需要“去激活”的页面,即长期处于活跃列表但近期未被访问的页面,需将其从“活跃列表”移到“非活跃列表”(降级回收优先级) */ pvec = &per_cpu(lru_deactivate_pvecs, cpu); if (pagevec_count(pvec)) pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); /*激活的页面*/ activate_page_drain(cpu); } void __pagevec_lru_add(struct pagevec *pvec) { pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL); } pagevec_lru_move_fn函数在前面lru部分详细描述过,这里不再赘述。 static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, void *arg) { int file = page_is_file_cache(page); /*是否为文件页*/ int active = PageActive(page); enum lru_list lru = page_lru(page); /*返回页面对应的lru链表类型*/ /* 若页面已在LRU列表中(PageLRU(page)=1),再次调用lru_cache_add会导致重复添加 */ VM_BUG_ON_PAGE(PageLRU(page), page); /*检查页面是否位于LRU列表中*/ SetPageLRU(page); /*设置page->flags PG_lru*/ /*将page添加到lruvec->lists[lru]对应类型的链表上*/ add_page_to_lru_list(page, lruvec, lru); /*更新统计数据*/ update_page_reclaim_stat(lruvec, file, active); trace_mm_lru_insertion(page, lru); } static void update_page_reclaim_stat(struct lruvec *lruvec, int file, int rotated) { struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; reclaim_stat->recent_scanned[file]++; if (rotated) reclaim_stat->recent_rotated[file]++; } static void pagevec_move_tail(struct pagevec *pvec) { int pgmoved = 0; /*将缓存非活跃页面添加到非活跃链表尾部*/ pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved); __count_vm_events(PGROTATED, pgmoved); } static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec, void *arg) { int *pgmoved = arg; if (PageLRU(page)/*page在LRU上*/ && !PageActive(page)/*非活跃*/ && !PageUnevictable(page)/*可回收页面*/) { enum lru_list lru = page_lru_base_type(page); /*lru类型*/ list_move_tail(&page->lru, &lruvec->lists[lru]); /*添加到非活跃链表尾部*/ (*pgmoved)++; } } static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, void *arg) { int lru, file; bool active; if (!PageLRU(page)) /*未在lru链表上则返回*/ return; if (PageUnevictable(page)) /*不可回收页面,返回*/ return; /* Some processes are using the page */ if (page_mapped(page)) return; active = PageActive(page); /*是否活跃页面*/ file = page_is_file_cache(page); /*是否为文件页*/ lru = page_lru_base_type(page); /*page所在lru链表的基本类型(非活跃类型)*/ /*将page从所在lru链表上删除*/ del_page_from_lru_list(page, lruvec, lru + active); ClearPageActive(page); ClearPageReferenced(page); /*将page添加到非活跃链表上*/ add_page_to_lru_list(page, lruvec, lru); if (PageWriteback(page) || PageDirty(page)) { /* * PG_reclaim could be raced with end_page_writeback * It can make readahead confusing. But race window * is _really_ small and it's non-critical problem. */ SetPageReclaim(page); } else { /* * The page's writeback ends up during pagevec * We moves tha page into tail of inactive. */ /*移到尾部*/ list_move_tail(&page->lru, &lruvec->lists[lru]); __count_vm_event(PGROTATED); } if (active) __count_vm_event(PGDEACTIVATE); update_page_reclaim_stat(lruvec, file, 0); } static void activate_page_drain(int cpu) { struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu); if (pagevec_count(pvec)) pagevec_lru_move_fn(pvec, __activate_page, NULL); } static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); per-cpu 变量activate_page_pvecs缓存需要激活的页面,如下, void activate_page(struct page *page) { if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); page_cache_get(page); if (!pagevec_add(pvec, page)) pagevec_lru_move_fn(pvec, __activate_page, NULL); put_cpu_var(activate_page_pvecs); } } activate_page激活的页面实际上并没有立马放入lru对应的活跃链表中而是放在了activate_page_pvecs缓存中。 /*激活页面,就是将页面从inactive LRU链表中删除再放入active LRU链表中*/ static void __activate_page(struct page *page, struct lruvec *lruvec, void *arg) { if (PageLRU(page) && !PageActive(page)/*未在激活链表*/ && !PageUnevictable(page)/*可回收*/) { int file = page_is_file_cache(page); int lru = page_lru_base_type(page); /*非激活LRU链表类型*/ /*将page从非激活链表中删除*/ del_page_from_lru_list(page, lruvec, lru); /*设置PG_active标志*/ SetPageActive(page); lru += LRU_ACTIVE; /*将page添加到active链表上*/ add_page_to_lru_list(page, lruvec, lru); trace_mm_lru_activate(page); __count_vm_event(PGACTIVATE); update_page_reclaim_stat(lruvec, file, 1); } }

下面isolate_lru_pages是隔离操作,根据隔离条件对lru类型对应的lru链表上的页面进行隔离到dst链表上。

分离页面有如下4种类型:

  • /* Isolate clean file 分离干净的页面*/
  • #define ISOLATE_CLEAN ((__force isolate_mode_t)0x1)
  • /* Isolate unmapped file 分离没有映射的页面*/
  • #define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2)
  • /* Isolate for asynchronous migration 分离异步合并的页面*/
  • #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4)
  • /* Isolate unevictable pages 分离不可回收的页面*/
  • #define ISOLATE_UNEVICTABLE ((__force isolate_mode_t)0x8)
/* 按照扫描预算,从指定LRU中取出若干页面,放到目标链表dst中,供后续回收或迁移 */ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, struct lruvec *lruvec, struct list_head *dst/*隔离后页面挂入的目标链表*/, unsigned long *nr_scanned/*输出,实际扫描页数*/, struct scan_control *sc, isolate_mode_t mode, enum lru_list lru) { struct list_head *src = &lruvec->lists[lru]; /*对应的LRU链表*/ unsigned long nr_taken = 0; unsigned long scan; /*遍历lru链表上的页面*/ for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { struct page *page; int nr_pages; /*prev?倒序取lru上的页面,从LRU尾部取页(LRU最老)*/ page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); VM_BUG_ON_PAGE(!PageLRU(page), page); /* __isolate_lru_page是内存管理中的底层隔离函数,判断page能够隔离 */ switch (__isolate_lru_page(page, mode)) { case 0: /*page可以隔离*/ nr_pages = hpage_nr_pages(page); mem_cgroup_update_lru_size(lruvec, lru, -nr_pages); list_move(&page->lru, dst); /*挂到dst链表上*/ nr_taken += nr_pages; break; case -EBUSY: /* else it is being freed elsewhere */ /* 页面正被写回 / 映射 / dirty,无法按当前mode隔离,放回LRU,继续扫描 */ list_move(&page->lru, src); continue; default: BUG(); } } *nr_scanned = scan; /* ftrace跟踪点,用于性能分析和调试 */ trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan, nr_taken, mode, is_file_lru(lru)); return nr_taken; } /* 内存管理中的底层隔离函数,判断page能否隔离 */ int __isolate_lru_page(struct page *page, isolate_mode_t mode) { int ret = -EINVAL; /* Only take pages on the LRU. */ if (!PageLRU(page)) /*只有LRU页面​才能被隔离*/ return ret; /* Compaction should not handle unevictable pages but CMA can do so */ if (PageUnevictable(page)/*页面被标记为不可回收*/ && !(mode & ISOLATE_UNEVICTABLE)/*不允许隔离不可回收页*/) return ret; ret = -EBUSY; /* * To minimise LRU disruption, the caller can indicate that it only * wants to isolate pages it will be able to operate on without * blocking - clean pages for the most part. * * ISOLATE_CLEAN means that only clean pages should be isolated. This * is used by reclaim when it is cannot write to backing storage * * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages * that it is possible to migrate without blocking */ /* 这两种模式共同点: 不允许阻塞 不允许写回 不允许复杂迁移 */ if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) { /* All the caller can do on PageWriteback is block */ if (PageWriteback(page)) /*Writeback页面正在写磁盘,必然阻塞*/ return ret; if (PageDirty(page)) { struct address_space *mapping; /* ISOLATE_CLEAN means only clean pages */ if (mode & ISOLATE_CLEAN) /*只允许clean页,dirty页面返回失败*/ return ret; /* * Only pages without mappings or that have a * ->migratepage callback are possible to migrate * without blocking */ mapping = page_mapping(page); if (mapping && !mapping->a_ops->migratepage) /*无法异步迁移则拒绝*/ return ret; } } if ((mode & ISOLATE_UNMAPPED/*只允许未映射页*/) && page_mapped(page)/*页面仍被进程映射*/) return ret; if (likely(get_page_unless_zero(page))) { /* * Be careful not to clear PageLRU until after we're * sure the page is not being freed elsewhere -- the * page release code relies on it. */ /* 为什么先get_page,再ClearPageLRU? 防止release_pages()并发释放 PageLRU是LRU扫描的信任标志 */ ClearPageLRU(page); ret = 0; } return ret; }
http://www.jsqmd.com/news/637844/

相关文章:

  • 一键体验Phi-4-mini-reasoning:快速解决数学、逻辑与分析问题
  • 机器学习工程师的日常:挑战与解决
  • vLLM-v0.17.1一文详解:前缀缓存+推测性解码降低首token延迟
  • 2026年好上锡的实芯焊锡丝/助焊接焊锡丝/免清洗焊锡丝多家厂家对比分析 - 品牌宣传支持者
  • Qwen3.5-2B部署教程:阿里云ACK集群中Qwen3.5-2B服务化封装与API网关对接
  • PP-DocLayoutV3助力学术出版:LaTeX论文手稿的自动排版分析
  • Qwen3.5-4B模型HEIC图片批量转换JPG格式的自动化脚本生成
  • 从零搭建机票预订系统:UML建模+Java EE实战避坑指南
  • AIAgent可观测性形同虚设?SITS2026标准提案:嵌入式Trace ID注入、意图日志Schema、决策溯源图谱——构建Agent世界的APM新范式
  • 吐血整理:新手小白学习人工智能,推荐哪些入门书籍和课程?适合零基础的有哪些?
  • Serilog:从结构化日志认知到 .NET 工程落地炙
  • 我在 Cursor 里接入了 Claude Code,三种方式实测告诉你哪个最好用
  • 智元远征A3完成全球首批客户交付
  • 零基础玩转扣子平台:集成谷歌Nano Banana模型实现智能图像生成
  • MogFace效果惊艳:高清图片人脸检测,绿色框标注清晰可见
  • Qwen3-8B工具调用快速上手:5分钟学会构建智能应用
  • **发散创新:基于Python与Whisper的实时语音识别系统实战解析**在人工智能飞速发展的今天,**语
  • 从零开始:建立企业级Abaqus许可证管理制度(含模板)
  • 终极语言学习革命:如何通过肌肉记忆训练重塑你的编程与英语能力?
  • 全网最全:新手小白学习人工智能,推荐哪些入门书籍和课程?适合零基础的有哪些?
  • UDOP-large入门指南:零基础部署,快速实现英文文档智能理解
  • YOLOv11前瞻探讨:Phi-4-mini-reasoning解读目标检测技术演进趋势
  • Z-Image-Turbo实战测评:生成速度、图片质量、中文支持全面解析
  • 软技能训练营:说服力与谈判术——软件测试从业者的进阶指南
  • 推荐几款适合送人的红茶,体面又有心意
  • 从领域驱动到本体论:AI 时代的架构方法论变了独
  • AIGlasses_for_navigation与Matlab联合仿真:机器人视觉导航算法验证环境搭建
  • 手把手教你用IndexTTS-2-LLM:快速搭建多语种语音合成服务
  • DeepSeek-R1-Distill-Qwen-7B推理效果实测:Ollama部署后的真实问答案例
  • SPI协议极简指南:5分钟搞懂CPOL和CPHA的四种组合模式