#author("2025-09-11T21:19:57+09:00","default:guest","guest") #author("2025-09-11T21:20:21+09:00","default:guest","guest") *参照元 [#c0c8c79f] #backlinks *説明 [#c7f126e9] -パス: [[linux-5.15/mm/vmscan.c]] -FIXME: これは何? --説明 **引数 [#afe419ce] -pg_data_t *pgdat --メモリノード。 --[[linux-5.15/pg_data_t]] -struct scan_control *sc -- --[[linux-5.15/scan_control]] **返り値 [#z680e3db] -なし **参考 [#y859b7a5] *実装 [#fd676bbb] static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) { struct reclaim_state *reclaim_state = current->reclaim_state; unsigned long nr_reclaimed, nr_scanned; struct lruvec *target_lruvec; bool reclaimable = false; unsigned long file; target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); - --[[linux-5.15/reclaim_state]] --[[linux-5.15/lruvec]] --[[linux-5.15/mem_cgroup_lruvec()]] again: /* * Flush the memory cgroup stats, so that we read accurate per-memcg * lruvec stats for heuristics. */ mem_cgroup_flush_stats(); memset(&sc->nr, 0, sizeof(sc->nr)); nr_reclaimed = sc->nr_reclaimed; nr_scanned = sc->nr_scanned; - --[[linux-5.15/mem_cgroup_flush_stats()]] /* * Determine the scan balance between anon and file LRUs. */ spin_lock_irq(&target_lruvec->lru_lock); sc->anon_cost = target_lruvec->anon_cost; sc->file_cost = target_lruvec->file_cost; spin_unlock_irq(&target_lruvec->lru_lock); - --[[linux-5.15/spin_lock_irq()]] --[[linux-5.15/spin_unlock_irq()]] /* * Target desirable inactive:active list ratios for the anon * and file LRU lists. */ if (!sc->force_deactivate) { -強制的にactive系もページ回収対象にするフラグがない。初期値はfalseなので基本的にはこのブロックにくるはず。 unsigned long refaults; refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON); if (refaults != target_lruvec->refaults[0] || inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) sc->may_deactivate |= DEACTIVATE_ANON; else sc->may_deactivate &= ~DEACTIVATE_ANON; - --[[linux-5.15/lruvec_page_state()]] --[[linux-5.15/inactive_is_low()]] /* * When refaults are being observed, it means a new * workingset is being established. Deactivate to get * rid of any stale active pages quickly. */ refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_FILE); if (refaults != target_lruvec->refaults[1] || inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) sc->may_deactivate |= DEACTIVATE_FILE; else sc->may_deactivate &= ~DEACTIVATE_FILE; - --[[linux-5.15/lruvec_page_state()]] --[[linux-5.15/inactive_is_low()]] } else sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; -強制的にactive系もページ回収対象にするフラグがある。ページ回収でactive系をinactive系に移動させ、ページ回収対象にする。 /* * If we have plenty of inactive file pages that aren't * thrashing, try to reclaim those first before touching * anonymous pages. */ file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE); if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) sc->cache_trim_mode = 1; else sc->cache_trim_mode = 0; - --[[linux-5.15/lruvec_page_state()]] /* * Prevent the reclaimer from falling into the cache trap: as * cache pages start out inactive, every cache fault will tip * the scan balance towards the file LRU. And as the file LRU * shrinks, so does the window for rotation from references. * This means we have a runaway feedback loop where a tiny * thrashing file LRU becomes infinitely more attractive than * anon pages. Try to detect this based on file LRU size. */ if (!cgroup_reclaim(sc)) { unsigned long total_high_wmark = 0; unsigned long free, anon; int z; - --[[linux-5.15/cgroup_reclaim()]] free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); file = node_page_state(pgdat, NR_ACTIVE_FILE) + node_page_state(pgdat, NR_INACTIVE_FILE); for (z = 0; z < MAX_NR_ZONES; z++) { struct zone *zone = &pgdat->node_zones[z]; if (!managed_zone(zone)) continue; total_high_wmark += high_wmark_pages(zone); } - --[[linux-5.15/sum_zone_node_page_state()]] --[[linux-5.15/node_page_state()]] --[[linux-5.15/managed_zone()]] --[[linux-5.15/high_wmark_pages()]] /* * Consider anon: if that's low too, this isn't a * runaway file reclaim problem, but rather just * extreme pressure. Reclaim as per usual then. */ anon = node_page_state(pgdat, NR_INACTIVE_ANON); sc->file_is_tiny = file + free <= total_high_wmark && !(sc->may_deactivate & DEACTIVATE_ANON) && anon >> sc->priority; - --[[linux-5.15/node_page_state()]] } shrink_node_memcgs(pgdat, sc); - --[[linux-5.15/shrink_node_memcgs()]] if (reclaim_state) { sc->nr_reclaimed += reclaim_state->reclaimed_slab; reclaim_state->reclaimed_slab = 0; } /* Record the subtree's reclaim efficiency */ vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true, sc->nr_scanned - nr_scanned, sc->nr_reclaimed - nr_reclaimed); if (sc->nr_reclaimed - nr_reclaimed) reclaimable = true; - --[[linux-5.15/vmpressure()]] if (current_is_kswapd()) { /* * If reclaim is isolating dirty pages under writeback, * it implies that the long-lived page allocation rate * is exceeding the page laundering rate. Either the * global limits are not being effective at throttling * processes due to the page distribution throughout * zones or there is heavy usage of a slow backing * device. The only option is to throttle from reclaim * context which is not ideal as there is no guarantee * the dirtying process is throttled in the same way * balance_dirty_pages() manages. * * Once a node is flagged PGDAT_WRITEBACK, kswapd will * count the number of pages under pages flagged for * immediate reclaim and stall if any are encountered * in the nr_immediate check below. */ if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken) set_bit(PGDAT_WRITEBACK, &pgdat->flags); - --[[linux-5.15/current_is_kswapd()]] --[[linux-5.15/set_bit()]] /* Allow kswapd to start writing pages during reclaim.*/ if (sc->nr.unqueued_dirty == sc->nr.file_taken) set_bit(PGDAT_DIRTY, &pgdat->flags); /* * If kswapd scans pages marked for immediate * reclaim and under writeback (nr_immediate), it * implies that pages are cycling through the LRU * faster than they are written so also forcibly stall. */ if (sc->nr.immediate) congestion_wait(BLK_RW_ASYNC, HZ/10); - --[[linux-5.15/congestion_wait()]] } /* * Tag a node/memcg as congested if all the dirty pages * scanned were backed by a congested BDI and * wait_iff_congested will stall. * * Legacy memcg will stall in page writeback so avoid forcibly * stalling in wait_iff_congested(). */ if ((current_is_kswapd() || (cgroup_reclaim(sc) && writeback_throttling_sane(sc))) && sc->nr.dirty && sc->nr.dirty == sc->nr.congested) set_bit(LRUVEC_CONGESTED, &target_lruvec->flags); - --[[linux-5.15/current_is_kswapd()]] --[[linux-5.15/cgourp_reclaim()]] --[[linux-5.15/writeback_throttling_sane()]] --[[linux-5.15/set_bit()]] /* * Stall direct reclaim for IO completions if underlying BDIs * and node is congested. Allow kswapd to continue until it * starts encountering unqueued dirty pages or cycling through * the LRU too quickly. */ if (!current_is_kswapd() && current_may_throttle() && !sc->hibernation_mode && test_bit(LRUVEC_CONGESTED, &target_lruvec->flags)) wait_iff_congested(BLK_RW_ASYNC, HZ/10); if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc)) goto again; - --[[linux-5.15/current_is_kswapd()]] --[[linux-5.15/current_may_throttle()]] --[[linux-5.15/test_bit()]] --[[linux-5.15/wait_iff_congested()]] --[[linux-5.15/should_continue_reclaim()]] /* * Kswapd gives up on balancing particular nodes after too * many failures to reclaim anything from them and goes to * sleep. On reclaim progress, reset the failure counter. A * successful direct reclaim run will revive a dormant kswapd. */ if (reclaimable) pgdat->kswapd_failures = 0; } - --[[linux-5.15/]] *コメント [#nf542afc]