#author("2025-09-11T20:33:37+09:00","default:guest","guest") #author("2025-09-11T20:35:14+09:00","default:guest","guest") *参照元 [#l7b0ff8f] #backlinks *説明 [#nc61627d] -パス: [[linux-5.15/mm/vmscan.c]] -FIXME: これは何? --説明 **引数 [#r15fc0a7] - -struct list_head *page_list -- --[[linux-5.15/list_head]] -struct pglist_data *pgdat -- --[[linux-5.15/pglist_data]] -struct scan_control *sc -- --[[linux-5.15/scan_control]] -struct reclaim_stat *stat -- --[[linux-5.15/reclaim_stat]] -bool ignore_references -- **返り値 [#c84e17f4] - -- -unsigned int --回収したページ数。 **参考 [#y00aff75] *実装 [#la422937] /* * shrink_page_list() returns the number of reclaimed pages */ static unsigned int shrink_page_list(struct list_head *page_list, struct pglist_data *pgdat, struct scan_control *sc, struct reclaim_stat *stat, bool ignore_references) { LIST_HEAD(ret_pages); LIST_HEAD(free_pages); LIST_HEAD(demote_pages); unsigned int nr_reclaimed = 0; unsigned int pgactivate = 0; bool do_demote_pass; memset(stat, 0, sizeof(*stat)); cond_resched(); do_demote_pass = can_demote(pgdat->node_id, sc); - --[[linux-5.15/LIST_HEAD()]] --[[linux-5.15/cond_resched()]] --[[linux-5.15/can_demote()]] retry: while (!list_empty(page_list)) { struct address_space *mapping; struct page *page; enum page_references references = PAGEREF_RECLAIM; bool dirty, writeback, may_enter_fs; unsigned int nr_pages; cond_resched(); - --[[linux-5.15/list_empty()]] --[[linux-5.15/address_space]] --[[linux-5.15/page]] --[[linux-5.15/page_references]] --[[linux-5.15/cond_resched()]] page = lru_to_page(page_list); list_del(&page->lru); - --[[linux-5.15/lru_to_page()]] --[[linux-5.15/list_del()]] if (!trylock_page(page)) goto keep; VM_BUG_ON_PAGE(PageActive(page), page); nr_pages = compound_nr(page); - --[[linux-5.15/trylock_page()]] --[[linux-5.15/VM_BUG_ON_PAGE()]] --[[linux-5.15/PageActive()]] --[[linux-5.15/compound_nr()]] /* Account the number of base pages even though THP */ sc->nr_scanned += nr_pages; if (unlikely(!page_evictable(page))) goto activate_locked; if (!sc->may_unmap && page_mapped(page)) goto keep_locked; may_enter_fs = (sc->gfp_mask & __GFP_FS) || (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); - --[[linux-5.15/unlikely()]] --[[linux-5.15/page_evictable()]] --[[linux-5.15/page_mapped()]] --[[linux-5.15/PageSwapCache()]] /* * The number of dirty pages determines if a node is marked * reclaim_congested which affects wait_iff_congested. kswapd * will stall and start writing pages if the tail of the LRU * is all dirty unqueued pages. */ page_check_dirty_writeback(page, &dirty, &writeback); if (dirty || writeback) stat->nr_dirty++; if (dirty && !writeback) stat->nr_unqueued_dirty++; - --[[linux-5.15/page_check_dirty_writeback()]] /* * Treat this page as congested if the underlying BDI is or if * pages are cycling through the LRU so quickly that the * pages marked for immediate reclaim are making it to the * end of the LRU a second time. */ mapping = page_mapping(page); if (((dirty || writeback) && mapping && inode_write_congested(mapping->host)) || (writeback && PageReclaim(page))) stat->nr_congested++; - --[[linux-5.15/page_mapping()]] --[[linux-5.15/inode_write_congested()]] --[[linux-5.15/PageReclaim()]] /* * If a page at the tail of the LRU is under writeback, there * are three cases to consider. * * 1) If reclaim is encountering an excessive number of pages * under writeback and this page is both under writeback and * PageReclaim then it indicates that pages are being queued * for IO but are being recycled through the LRU before the * IO can complete. Waiting on the page itself risks an * indefinite stall if it is impossible to writeback the * page due to IO error or disconnected storage so instead * note that the LRU is being scanned too quickly and the * caller can stall after page list has been processed. * * 2) Global or new memcg reclaim encounters a page that is * not marked for immediate reclaim, or the caller does not * have __GFP_FS (or __GFP_IO if it's simply going to swap, * not to fs). In this case mark the page for immediate * reclaim and continue scanning. * * Require may_enter_fs because we would wait on fs, which * may not have submitted IO yet. And the loop driver might * enter reclaim, and deadlock if it waits on a page for * which it is needed to do the write (loop masks off * __GFP_IO|__GFP_FS for this reason); but more thought * would probably show more reasons. * * 3) Legacy memcg encounters a page that is already marked * PageReclaim. memcg does not have any dirty pages * throttling so we could easily OOM just because too many * pages are in writeback and there is nothing else to * reclaim. Wait for the writeback to complete. * * In cases 1) and 2) we activate the pages to get them out of * the way while we continue scanning for clean pages on the * inactive list and refilling from the active list. The * observation here is that waiting for disk writes is more * expensive than potentially causing reloads down the line. * Since they're marked for immediate reclaim, they won't put * memory pressure on the cache working set any longer than it * takes to write them to disk. */ if (PageWriteback(page)) { /* Case 1 above */ if (current_is_kswapd() && PageReclaim(page) && test_bit(PGDAT_WRITEBACK, &pgdat->flags)) { stat->nr_immediate++; goto activate_locked; - --[[linux-5.15/PageWriteback()]] --[[linux-5.15/current_is_kswapd()]] --[[linux-5.15/PageReclaim()]] --[[linux-5.15/test_bit()]] /* Case 2 above */ } else if (writeback_throttling_sane(sc) || !PageReclaim(page) || !may_enter_fs) { /* * This is slightly racy - end_page_writeback() * might have just cleared PageReclaim, then * setting PageReclaim here end up interpreted * as PageReadahead - but that does not matter * enough to care. What we do want is for this * page to have PageReclaim set next time memcg * reclaim reaches the tests above, so it will * then wait_on_page_writeback() to avoid OOM; * and it's also appropriate in global reclaim. */ SetPageReclaim(page); stat->nr_writeback++; goto activate_locked; - --[[linux-5.15/writeback_throttling_sane()]] --[[linux-5.15/PageReclaim()]] --[[linux-5.15/SetPageReclaim()]] /* Case 3 above */ } else { unlock_page(page); wait_on_page_writeback(page); /* then go back and try same page again */ list_add_tail(&page->lru, page_list); continue; } } - --[[linux-5.15/unlock_page()]] --[[linux-5.15/wait_on_page_writeback()]] --[[linux-5.15/list_add_tail()]] if (!ignore_references) references = page_check_references(page, sc); - --[[linux-5.15/page_check_references()]] switch (references) { case PAGEREF_ACTIVATE: goto activate_locked; case PAGEREF_KEEP: stat->nr_ref_keep += nr_pages; goto keep_locked; case PAGEREF_RECLAIM: case PAGEREF_RECLAIM_CLEAN: ; /* try to reclaim the page below */ } /* * Before reclaiming the page, try to relocate * its contents to another node. */ if (do_demote_pass && (thp_migration_supported() || !PageTransHuge(page))) { list_add(&page->lru, &demote_pages); unlock_page(page); continue; } - --[[linux-5.15/thp_migration_supported()]] --[[linux-5.15/PageTransHuge()]] --[[linux-5.15/list_add()]] --[[linux-5.15/unlock_page()]] /* * Anonymous process memory has backing store? * Try to allocate it some swap space here. * Lazyfree page could be freed directly */ if (PageAnon(page) && PageSwapBacked(page)) { if (!PageSwapCache(page)) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; if (page_maybe_dma_pinned(page)) goto keep_locked; - --[[linux-5.15/PageAnon()]] --[[linux-5.15/PageSwapBacked()]] --[[linux-5.15/PageSwapCache()]] --[[linux-5.15/page_maybe_dma_pinned()]] if (PageTransHuge(page)) { /* cannot split THP, skip it */ if (!can_split_huge_page(page, NULL)) goto activate_locked; /* * Split pages without a PMD map right * away. Chances are some or all of the * tail pages can be freed without IO. */ if (!compound_mapcount(page) && split_huge_page_to_list(page, page_list)) goto activate_locked; } - --[[linux-5.15/PageTransHuge()]] --[[linux-5.15/can_split_huge_page()]] --[[linux-5.15/compound_mapcount()]] --[[linux-5.15/split_huge_page_to_list()]] if (!add_to_swap(page)) { if (!PageTransHuge(page)) goto activate_locked_split; /* Fallback to swap normal pages */ if (split_huge_page_to_list(page, page_list)) goto activate_locked; #ifdef CONFIG_TRANSPARENT_HUGEPAGE count_vm_event(THP_SWPOUT_FALLBACK); #endif if (!add_to_swap(page)) goto activate_locked_split; } may_enter_fs = true; - --[[linux-5.15/add_to_swap()]] --[[linux-5.15/PageTransHuge()]] --[[linux-5.15/split_huge_page_to_list()]] --[[linux-5.15/count_vm_event()]] /* Adding to swap updated mapping */ mapping = page_mapping(page); } - --[[linux-5.15/page_mapping()]] } else if (unlikely(PageTransHuge(page))) { /* Split file THP */ if (split_huge_page_to_list(page, page_list)) goto keep_locked; } - --[[linux-5.15/unlikely()]] --[[linux-5.15/PageTransHuge()]] --[[linux-5.15/split_huge_page_to_list()]] /* * THP may get split above, need minus tail pages and update * nr_pages to avoid accounting tail pages twice. * * The tail pages that are added into swap cache successfully * reach here. */ if ((nr_pages > 1) && !PageTransHuge(page)) { sc->nr_scanned -= (nr_pages - 1); nr_pages = 1; } /* * The page is mapped into the page tables of one or more * processes. Try to unmap it here. */ if (page_mapped(page)) { enum ttu_flags flags = TTU_BATCH_FLUSH; bool was_swapbacked = PageSwapBacked(page); if (unlikely(PageTransHuge(page))) flags |= TTU_SPLIT_HUGE_PMD; try_to_unmap(page, flags); if (page_mapped(page)) { stat->nr_unmap_fail += nr_pages; if (!was_swapbacked && PageSwapBacked(page)) stat->nr_lazyfree_fail += nr_pages; goto activate_locked; } } - --[[linux-5.15/page_mapped()]] --[[linux-5.15/ttu_flags]] --[[linux-5.15/PageSwapBacked()]] --[[linux-5.15/unlikely()]] --[[linux-5.15/PageTransHuge()]] --[[linux-5.15/try_to_unmap()]] --[[linux-5.15/page_mapped()]] --[[linux-5.15/PageSwapBacked()]] if (PageDirty(page)) { /* * Only kswapd can writeback filesystem pages * to avoid risk of stack overflow. But avoid * injecting inefficient single-page IO into * flusher writeback as much as possible: only * write pages when we've encountered many * dirty pages, and when we've already scanned * the rest of the LRU for clean pages and see * the same dirty pages again (PageReclaim). */ if (page_is_file_lru(page) && (!current_is_kswapd() || !PageReclaim(page) || !test_bit(PGDAT_DIRTY, &pgdat->flags))) { /* * Immediately reclaim when written back. * Similar in principal to deactivate_page() * except we already have the page isolated * and know it's dirty */ inc_node_page_state(page, NR_VMSCAN_IMMEDIATE); SetPageReclaim(page); goto activate_locked; } - --[[linux-5.15/PageDirty()]] --[[linux-5.15/page_is_file_lru()]] --[[linux-5.15/current_is_kswapd()]] --[[linux-5.15/PageReclaim()]] --[[linux-5.15/test_bit()]] --[[linux-5.15/inc_node_page_state()]] --[[linux-5.15/SetPageReclaim()]] if (references == PAGEREF_RECLAIM_CLEAN) goto keep_locked; if (!may_enter_fs) goto keep_locked; if (!sc->may_writepage) goto keep_locked; /* * Page is dirty. Flush the TLB if a writable entry * potentially exists to avoid CPU writes after IO * starts and then write it out here. */ try_to_unmap_flush_dirty(); - --[[linux-5.15/try_to_unmap_flush_dirty()]] switch (pageout(page, mapping)) { case PAGE_KEEP: goto keep_locked; case PAGE_ACTIVATE: goto activate_locked; case PAGE_SUCCESS: stat->nr_pageout += thp_nr_pages(page); if (PageWriteback(page)) goto keep; if (PageDirty(page)) goto keep; /* * A synchronous write - probably a ramdisk. Go * ahead and try to reclaim the page. */ if (!trylock_page(page)) goto keep; if (PageDirty(page) || PageWriteback(page)) goto keep_locked; mapping = page_mapping(page); fallthrough; - --[[linux-5.15/thp_nr_pages()]] --[[linux-5.15/PageWriteback()]] --[[linux-5.15/PageDirty()]] --[[linux-5.15/trylock_page()]] --[[linux-5.15/PageDirty()]] --[[linux-5.15/page_mapping()]] case PAGE_CLEAN: ; /* try to free the page below */ } } /* * If the page has buffers, try to free the buffer mappings * associated with this page. If we succeed we try to free * the page as well. * * We do this even if the page is PageDirty(). * try_to_release_page() does not perform I/O, but it is * possible for a page to have PageDirty set, but it is actually * clean (all its buffers are clean). This happens if the * buffers were written out directly, with submit_bh(). ext3 * will do this, as well as the blockdev mapping. * try_to_release_page() will discover that cleanness and will * drop the buffers and mark the page clean - it can be freed. * * Rarely, pages can have buffers and no ->mapping. These are * the pages which were not successfully invalidated in * truncate_cleanup_page(). We try to drop those buffers here * and if that worked, and the page is no longer mapped into * process address space (page_count == 1) it can be freed. * Otherwise, leave the page on the LRU so it is swappable. */ if (page_has_private(page)) { if (!try_to_release_page(page, sc->gfp_mask)) goto activate_locked; if (!mapping && page_count(page) == 1) { unlock_page(page); if (put_page_testzero(page)) goto free_it; else { /* * rare race with speculative reference. * the speculative reference will free * this page shortly, so we may * increment nr_reclaimed here (and * leave it off the LRU). */ nr_reclaimed++; continue; } } } - --[[linux-5.15/page_has_private()]] --[[linux-5.15/try_to_release_page()]] --[[linux-5.15/page_count()]] --[[linux-5.15/unlock_page()]] --[[linux-5.15/put_page_testzero()]] if (PageAnon(page) && !PageSwapBacked(page)) { /* follow __remove_mapping for reference */ if (!page_ref_freeze(page, 1)) goto keep_locked; /* * The page has only one reference left, which is * from the isolation. After the caller puts the * page back on lru and drops the reference, the * page will be freed anyway. It doesn't matter * which lru it goes. So we don't bother checking * PageDirty here. */ count_vm_event(PGLAZYFREED); count_memcg_page_event(page, PGLAZYFREED); } else if (!mapping || !__remove_mapping(mapping, page, true, sc->target_mem_cgroup)) goto keep_locked; unlock_page(page); - --[[linux-5.15/PageAnon()]] --[[linux-5.15/PageSwapBacked()]] --[[linux-5.15/page_ref_freeze()]] --[[linux-5.15/count_vm_event()]] --[[linux-5.15/count_memcg_page_event()]] --[[linux-5.15/__remove_mapping()]] --[[linux-5.15/unlock_page()]] free_it: /* * THP may get swapped out in a whole, need account * all base pages. */ nr_reclaimed += nr_pages; /* * Is there need to periodically free_page_list? It would * appear not as the counts should be low */ if (unlikely(PageTransHuge(page))) destroy_compound_page(page); else list_add(&page->lru, &free_pages); continue; - --[[linux-5.15/unlikely()]] --[[linux-5.15/PageTransHuge()]] --[[linux-5.15/destroy_compound_page()]] --[[linux-5.15/list_add()]] activate_locked_split: /* * The tail pages that are failed to add into swap cache * reach here. Fixup nr_scanned and nr_pages. */ if (nr_pages > 1) { sc->nr_scanned -= (nr_pages - 1); nr_pages = 1; } activate_locked: /* Not a candidate for swapping, so reclaim swap space. */ if (PageSwapCache(page) && (mem_cgroup_swap_full(page) || PageMlocked(page))) try_to_free_swap(page); VM_BUG_ON_PAGE(PageActive(page), page); if (!PageMlocked(page)) { int type = page_is_file_lru(page); SetPageActive(page); stat->nr_activate[type] += nr_pages; count_memcg_page_event(page, PGACTIVATE); } - --[[linux-5.15/PageSwapCache()]] --[[linux-5.15/mem_cgroup_swap_full()]] --[[linux-5.15/PageMlocked()]] --[[linux-5.15/try_to_free_swap()]] --[[linux-5.15/VM_BUG_ON_PAGE()]] --[[linux-5.15/PageActive()]] --[[linux-5.15/PageMlocked()]] --[[linux-5.15/page_is_file_lru()]] --[[linux-5.15/SetPageActive()]] --[[linux-5.15/count_memcg_page_event()]] keep_locked: unlock_page(page); keep: list_add(&page->lru, &ret_pages); VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page); } /* 'page_list' is always empty here */ - --[[linux-5.15/unlock_page()]] --[[linux-5.15/list_add()]] --[[linux-5.15/VM_BUG_ON_PAGE()]] --[[linux-5.15/PageLRU()]] --[[linux-5.15/PageUnevictable()]] /* Migrate pages selected for demotion */ nr_reclaimed += demote_page_list(&demote_pages, pgdat); /* Pages that could not be demoted are still in @demote_pages */ if (!list_empty(&demote_pages)) { /* Pages which failed to demoted go back on @page_list for retry: */ list_splice_init(&demote_pages, page_list); do_demote_pass = false; goto retry; } pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; - --[[linux-5.15/demote_page_list()]] --[[linux-5.15/list_epmty()]] --[[linux-5.15/list_splice_init()]] mem_cgroup_uncharge_list(&free_pages); try_to_unmap_flush(); free_unref_page_list(&free_pages); list_splice(&ret_pages, page_list); count_vm_events(PGACTIVATE, pgactivate); return nr_reclaimed; } - --[[linux-5.15/mem_cgroup_uncharge_list()]] --[[linux-5.15/try_to_unmap_flush()]] --[[linux-5.15/free_unref_page_list()]] --[[linux-5.15/list_splice()]] --[[linux-5.15/count_vm_events()]] *コメント [#ea55e1e6]