linux-5.15/balance_pgdat() の変更点

追加された行はこの色です。
削除された行はこの色です。
linux-5.15/balance_pgdat() へ行く。
linux-5.15/balance_pgdat() の差分を削除
#author("2025-09-11T17:02:07+09:00","default:guest","guest")
#author("2025-09-11T17:04:08+09:00","default:guest","guest")
*参照元 [#fe988b01]
#backlinks

*説明 [#kd03c0f4]
-パス: [[linux-5.15/mm/vmscan.c]]

-FIXME: これは何？
--説明


**引数 [#m608ea88]
-
--
-pg_data_t *pgdat
--メモリノード。
--[[linux-5.15/pg_data_t]]
-int order
--オーダー。
-int highest_zoneidx
--メモリを確保する最大のゾーンインデックス値。この値以下のゾーンがチェック対象です。


**返り値 [#j1c0272a]
-
-int
--

**参考 [#i2d498e3]


*実装 [#v3b2814b]

 /*
  * For kswapd, balance_pgdat() will reclaim pages across a node from zones
  * that are eligible for use by the caller until at least one zone is
  * balanced.
  *
  * Returns the order kswapd finished reclaiming at.
  *
  * kswapd scans the zones in the highmem->normal->dma direction.  It skips
  * zones which have free_pages > high_wmark_pages(zone), but once a zone is
  * found to have free_pages <= high_wmark_pages(zone), any page in that zone
  * or lower is eligible for reclaim until at least one usable zone is
  * balanced.
  */
 static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
 {
 	int i;
 	unsigned long nr_soft_reclaimed;
 	unsigned long nr_soft_scanned;
 	unsigned long pflags;
 	unsigned long nr_boost_reclaim;
 	unsigned long zone_boosts[MAX_NR_ZONES] = { 0, };
 	bool boosted;
 	struct zone *zone;
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
 		.order = order,
 		.may_unmap = 1,
 	};
 
 	set_task_reclaim_state(current, &sc.reclaim_state);
 	psi_memstall_enter(&pflags);
 	__fs_reclaim_acquire(_THIS_IP_);
 
 	count_vm_event(PAGEOUTRUN);
 
-
--[[linux-5.15/zone]]
--[[linux-5.15/scan_control]]
--[[linux-5.15/set_task_reclaim_state()]]
--[[linux-5.15/psi_memstall_enter()]]
--[[linux-5.15/__fs_reclaim_acquire()]]
--[[linux-5.15/count_vm_event()]]

 	/*
 	 * Account for the reclaim boost. Note that the zone boost is left in
 	 * place so that parallel allocations that are near the watermark will
 	 * stall or direct reclaim until kswapd is finished.
 	 */
 	nr_boost_reclaim = 0;
 	for (i = 0; i <= highest_zoneidx; i++) {
 		zone = pgdat->node_zones + i;
 		if (!managed_zone(zone))
 			continue;
 
 		nr_boost_reclaim += zone->watermark_boost;
 		zone_boosts[i] = zone->watermark_boost;
 	}
 	boosted = nr_boost_reclaim;
 
-
--[[linux-5.15/managed_zone()]]

 restart:
 	set_reclaim_active(pgdat, highest_zoneidx);
 	sc.priority = DEF_PRIORITY;
-
--[[linux-5.15/set_reclaim_active()]]

 	do {
 		unsigned long nr_reclaimed = sc.nr_reclaimed;
 		bool raise_priority = true;
 		bool balanced;
 		bool ret;
 
 		sc.reclaim_idx = highest_zoneidx;
 
 		/*
 		 * If the number of buffer_heads exceeds the maximum allowed
 		 * then consider reclaiming from all zones. This has a dual
 		 * purpose -- on 64-bit systems it is expected that
 		 * buffer_heads are stripped during active rotation. On 32-bit
 		 * systems, highmem pages can pin lowmem memory and shrinking
 		 * buffers can relieve lowmem pressure. Reclaim may still not
 		 * go ahead if all eligible zones for the original allocation
 		 * request are balanced to avoid excessive reclaim from kswapd.
 		 */
 		if (buffer_heads_over_limit) {
 			for (i = MAX_NR_ZONES - 1; i >= 0; i--) {
 				zone = pgdat->node_zones + i;
 				if (!managed_zone(zone))
 					continue;
 
 				sc.reclaim_idx = i;
 				break;
 			}
 		}
 
-buffer_heads_over_limitが有効ならhightest_zoneidxを無視して、全てのゾーンからメモリを確保できるものとします。
--[[linux-5.15/managed_zone()]]

 		/*
 		 * If the pgdat is imbalanced then ignore boosting and preserve
 		 * the watermarks for a later time and restart. Note that the
 		 * zone watermarks will be still reset at the end of balancing
 		 * on the grounds that the normal reclaim should be enough to
 		 * re-evaluate if boosting is required when kswapd next wakes.
 		 */
 		balanced = pgdat_balanced(pgdat, sc.order, highest_zoneidx);
 		if (!balanced && nr_boost_reclaim) {
 			nr_boost_reclaim = 0;
 			goto restart;
 		}
 
-メモリノードの空きページのバランスが取れていれば、boost値を0にして再実行します。
--[[linux-5.15/pgdat_balanced()]]

 		/*
 		 * If boosting is not active then only reclaim if there are no
 		 * eligible zones. Note that sc.reclaim_idx is not used as
 		 * buffer_heads_over_limit may have adjusted it.
 		 */
 		if (!nr_boost_reclaim && balanced)
 			goto out;
 
-boost値が0でなく、メモリノードの空きページのバランスが取れていれば終了します。

 		/* Limit the priority of boosting to avoid reclaim writeback */
 		if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2)
 			raise_priority = false;
 
 		/*
 		 * Do not writeback or swap pages for boosted reclaim. The
 		 * intent is to relieve pressure not issue sub-optimal IO
 		 * from reclaim context. If no pages are reclaimed, the
 		 * reclaim will be aborted.
 		 */
 		sc.may_writepage = !laptop_mode && !nr_boost_reclaim;
 		sc.may_swap = !nr_boost_reclaim;
 
 		/*
 		 * Do some background aging of the anon list, to give
 		 * pages a chance to be referenced before reclaiming. All
 		 * pages are rotated regardless of classzone as this is
 		 * about consistent aging.
 		 */
 		age_active_anon(pgdat, &sc);
 
-
--[[linux-5.15/age_active_anon()]]

 		/*
 		 * If we're getting trouble reclaiming, start doing writepage
 		 * even in laptop mode.
 		 */
 		if (sc.priority < DEF_PRIORITY - 2)
 			sc.may_writepage = 1;
 
 		/* Call soft limit reclaim before calling shrink_node. */
 		sc.nr_scanned = 0;
 		nr_soft_scanned = 0;
 		nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(pgdat, sc.order,
 						sc.gfp_mask, &nr_soft_scanned);
 		sc.nr_reclaimed += nr_soft_reclaimed;
 
-
--[[linux-5.15/mem_cgroup_soft_limit_reclaim()]]

 		/*
 		 * There should be no need to raise the scanning priority if
 		 * enough pages are already being scanned that that high
 		 * watermark would be met at 100% efficiency.
 		 */
 		if (kswapd_shrink_node(pgdat, &sc))
 			raise_priority = false;
 
-
--[[linux-5.15/kswapd_shrink_node()]]

 		/*
 		 * If the low watermark is met there is no need for processes
 		 * to be throttled on pfmemalloc_wait as they should not be
 		 * able to safely make forward progress. Wake them
 		 */
 		if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
 				allow_direct_reclaim(pgdat))
 			wake_up_all(&pgdat->pfmemalloc_wait);
 
-
--[[linux-5.15/waitqueue_active()]]
--[[linux-5.15/allow_direct_reclaim()]]
--[[linux-5.15/wake_up_all()]]

 		/* Check if kswapd should be suspending */
 		__fs_reclaim_release(_THIS_IP_);
 		ret = try_to_freeze();
 		__fs_reclaim_acquire(_THIS_IP_);
 		if (ret || kthread_should_stop())
 			break;
 
-
--[[linux-5.15/__fs_reclaim_release()]]
--[[linux-5.15/try_to_freeze()]]
--[[linux-5.15/kthread_should_stop()]]

 		/*
 		 * Raise priority if scanning rate is too low or there was no
 		 * progress in reclaiming pages
 		 */
 		nr_reclaimed = sc.nr_reclaimed - nr_reclaimed;
 		nr_boost_reclaim -= min(nr_boost_reclaim, nr_reclaimed);
 
 		/*
 		 * If reclaim made no progress for a boost, stop reclaim as
 		 * IO cannot be queued and it could be an infinite loop in
 		 * extreme circumstances.
 		 */
 		if (nr_boost_reclaim && !nr_reclaimed)
 			break;
 
 		if (raise_priority || !nr_reclaimed)
 			sc.priority--;
 	} while (sc.priority >= 1);
 
 	if (!sc.nr_reclaimed)
 		pgdat->kswapd_failures++;
 
 out:
 	clear_reclaim_active(pgdat, highest_zoneidx);
 
-
--[[linux-5.15/clear_reclaim_active()]]

 	/* If reclaim was boosted, account for the reclaim done in this pass */
 	if (boosted) {
 		unsigned long flags;
 
 		for (i = 0; i <= highest_zoneidx; i++) {
 			if (!zone_boosts[i])
 				continue;
 
 			/* Increments are under the zone lock */
 			zone = pgdat->node_zones + i;
 			spin_lock_irqsave(&zone->lock, flags);
 			zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]);
 			spin_unlock_irqrestore(&zone->lock, flags);
 		}
 
 		/*
 		 * As there is now likely space, wakeup kcompact to defragment
 		 * pageblocks.
 		 */
 		wakeup_kcompactd(pgdat, pageblock_order, highest_zoneidx);
 	}
 
-
--[[linux-5.15/spin_lock_irqsave()]]
--[[linux-5.15/spin_unlock_irqsave()]]
--[[linux-5.15/wakeup_kcompactd()]]

 	snapshot_refaults(NULL, pgdat);
 	__fs_reclaim_release(_THIS_IP_);
 	psi_memstall_leave(&pflags);
 	set_task_reclaim_state(current, NULL);
 
-
--[[linux-5.15/snapshot_refaults()]]
--[[linux-5.15/__fs_reclaim_release()]]
--[[linux-5.15/psi_memstall_leave()]]
--[[linux-5.15/set_task_reclaim_state()]]

 	/*
 	 * Return the order kswapd stopped reclaiming at as
 	 * prepare_kswapd_sleep() takes it into account. If another caller
 	 * entered the allocator slow path while kswapd was awake, order will
 	 * remain at the higher level.
 	 */
 	return sc.order;
 }


*コメント [#m7833ae5]