#author("2025-09-11T17:02:07+09:00","default:guest","guest") #author("2025-09-11T17:04:08+09:00","default:guest","guest") *参照元 [#fe988b01] #backlinks *説明 [#kd03c0f4] -パス: [[linux-5.15/mm/vmscan.c]] -FIXME: これは何? --説明 **引数 [#m608ea88] - -- -pg_data_t *pgdat --メモリノード。 --[[linux-5.15/pg_data_t]] -int order --オーダー。 -int highest_zoneidx --メモリを確保する最大のゾーンインデックス値。この値以下のゾーンがチェック対象です。 **返り値 [#j1c0272a] - -int -- **参考 [#i2d498e3] *実装 [#v3b2814b] /* * For kswapd, balance_pgdat() will reclaim pages across a node from zones * that are eligible for use by the caller until at least one zone is * balanced. * * Returns the order kswapd finished reclaiming at. * * kswapd scans the zones in the highmem->normal->dma direction. It skips * zones which have free_pages > high_wmark_pages(zone), but once a zone is * found to have free_pages <= high_wmark_pages(zone), any page in that zone * or lower is eligible for reclaim until at least one usable zone is * balanced. */ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx) { int i; unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; unsigned long pflags; unsigned long nr_boost_reclaim; unsigned long zone_boosts[MAX_NR_ZONES] = { 0, }; bool boosted; struct zone *zone; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .order = order, .may_unmap = 1, }; set_task_reclaim_state(current, &sc.reclaim_state); psi_memstall_enter(&pflags); __fs_reclaim_acquire(_THIS_IP_); count_vm_event(PAGEOUTRUN); - --[[linux-5.15/zone]] --[[linux-5.15/scan_control]] --[[linux-5.15/set_task_reclaim_state()]] --[[linux-5.15/psi_memstall_enter()]] --[[linux-5.15/__fs_reclaim_acquire()]] --[[linux-5.15/count_vm_event()]] /* * Account for the reclaim boost. Note that the zone boost is left in * place so that parallel allocations that are near the watermark will * stall or direct reclaim until kswapd is finished. */ nr_boost_reclaim = 0; for (i = 0; i <= highest_zoneidx; i++) { zone = pgdat->node_zones + i; if (!managed_zone(zone)) continue; nr_boost_reclaim += zone->watermark_boost; zone_boosts[i] = zone->watermark_boost; } boosted = nr_boost_reclaim; - --[[linux-5.15/managed_zone()]] restart: set_reclaim_active(pgdat, highest_zoneidx); sc.priority = DEF_PRIORITY; - --[[linux-5.15/set_reclaim_active()]] do { unsigned long nr_reclaimed = sc.nr_reclaimed; bool raise_priority = true; bool balanced; bool ret; sc.reclaim_idx = highest_zoneidx; /* * If the number of buffer_heads exceeds the maximum allowed * then consider reclaiming from all zones. This has a dual * purpose -- on 64-bit systems it is expected that * buffer_heads are stripped during active rotation. On 32-bit * systems, highmem pages can pin lowmem memory and shrinking * buffers can relieve lowmem pressure. Reclaim may still not * go ahead if all eligible zones for the original allocation * request are balanced to avoid excessive reclaim from kswapd. */ if (buffer_heads_over_limit) { for (i = MAX_NR_ZONES - 1; i >= 0; i--) { zone = pgdat->node_zones + i; if (!managed_zone(zone)) continue; sc.reclaim_idx = i; break; } } -buffer_heads_over_limitが有効ならhightest_zoneidxを無視して、全てのゾーンからメモリを確保できるものとします。 --[[linux-5.15/managed_zone()]] /* * If the pgdat is imbalanced then ignore boosting and preserve * the watermarks for a later time and restart. Note that the * zone watermarks will be still reset at the end of balancing * on the grounds that the normal reclaim should be enough to * re-evaluate if boosting is required when kswapd next wakes. */ balanced = pgdat_balanced(pgdat, sc.order, highest_zoneidx); if (!balanced && nr_boost_reclaim) { nr_boost_reclaim = 0; goto restart; } -メモリノードの空きページのバランスが取れていれば、boost値を0にして再実行します。 --[[linux-5.15/pgdat_balanced()]] /* * If boosting is not active then only reclaim if there are no * eligible zones. Note that sc.reclaim_idx is not used as * buffer_heads_over_limit may have adjusted it. */ if (!nr_boost_reclaim && balanced) goto out; -boost値が0でなく、メモリノードの空きページのバランスが取れていれば終了します。 /* Limit the priority of boosting to avoid reclaim writeback */ if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2) raise_priority = false; /* * Do not writeback or swap pages for boosted reclaim. The * intent is to relieve pressure not issue sub-optimal IO * from reclaim context. If no pages are reclaimed, the * reclaim will be aborted. */ sc.may_writepage = !laptop_mode && !nr_boost_reclaim; sc.may_swap = !nr_boost_reclaim; /* * Do some background aging of the anon list, to give * pages a chance to be referenced before reclaiming. All * pages are rotated regardless of classzone as this is * about consistent aging. */ age_active_anon(pgdat, &sc); - --[[linux-5.15/age_active_anon()]] /* * If we're getting trouble reclaiming, start doing writepage * even in laptop mode. */ if (sc.priority < DEF_PRIORITY - 2) sc.may_writepage = 1; /* Call soft limit reclaim before calling shrink_node. */ sc.nr_scanned = 0; nr_soft_scanned = 0; nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(pgdat, sc.order, sc.gfp_mask, &nr_soft_scanned); sc.nr_reclaimed += nr_soft_reclaimed; - --[[linux-5.15/mem_cgroup_soft_limit_reclaim()]] /* * There should be no need to raise the scanning priority if * enough pages are already being scanned that that high * watermark would be met at 100% efficiency. */ if (kswapd_shrink_node(pgdat, &sc)) raise_priority = false; - --[[linux-5.15/kswapd_shrink_node()]] /* * If the low watermark is met there is no need for processes * to be throttled on pfmemalloc_wait as they should not be * able to safely make forward progress. Wake them */ if (waitqueue_active(&pgdat->pfmemalloc_wait) && allow_direct_reclaim(pgdat)) wake_up_all(&pgdat->pfmemalloc_wait); - --[[linux-5.15/waitqueue_active()]] --[[linux-5.15/allow_direct_reclaim()]] --[[linux-5.15/wake_up_all()]] /* Check if kswapd should be suspending */ __fs_reclaim_release(_THIS_IP_); ret = try_to_freeze(); __fs_reclaim_acquire(_THIS_IP_); if (ret || kthread_should_stop()) break; - --[[linux-5.15/__fs_reclaim_release()]] --[[linux-5.15/try_to_freeze()]] --[[linux-5.15/kthread_should_stop()]] /* * Raise priority if scanning rate is too low or there was no * progress in reclaiming pages */ nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; nr_boost_reclaim -= min(nr_boost_reclaim, nr_reclaimed); /* * If reclaim made no progress for a boost, stop reclaim as * IO cannot be queued and it could be an infinite loop in * extreme circumstances. */ if (nr_boost_reclaim && !nr_reclaimed) break; if (raise_priority || !nr_reclaimed) sc.priority--; } while (sc.priority >= 1); if (!sc.nr_reclaimed) pgdat->kswapd_failures++; out: clear_reclaim_active(pgdat, highest_zoneidx); - --[[linux-5.15/clear_reclaim_active()]] /* If reclaim was boosted, account for the reclaim done in this pass */ if (boosted) { unsigned long flags; for (i = 0; i <= highest_zoneidx; i++) { if (!zone_boosts[i]) continue; /* Increments are under the zone lock */ zone = pgdat->node_zones + i; spin_lock_irqsave(&zone->lock, flags); zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]); spin_unlock_irqrestore(&zone->lock, flags); } /* * As there is now likely space, wakeup kcompact to defragment * pageblocks. */ wakeup_kcompactd(pgdat, pageblock_order, highest_zoneidx); } - --[[linux-5.15/spin_lock_irqsave()]] --[[linux-5.15/spin_unlock_irqsave()]] --[[linux-5.15/wakeup_kcompactd()]] snapshot_refaults(NULL, pgdat); __fs_reclaim_release(_THIS_IP_); psi_memstall_leave(&pflags); set_task_reclaim_state(current, NULL); - --[[linux-5.15/snapshot_refaults()]] --[[linux-5.15/__fs_reclaim_release()]] --[[linux-5.15/psi_memstall_leave()]] --[[linux-5.15/set_task_reclaim_state()]] /* * Return the order kswapd stopped reclaiming at as * prepare_kswapd_sleep() takes it into account. If another caller * entered the allocator slow path while kswapd was awake, order will * remain at the higher level. */ return sc.order; } *コメント [#m7833ae5]