参照元†
- struct lruvec *lruvec
- struct scan_control *sc
- unsigned long *nr
返り値†
/*
* Determine how aggressively the anon and file LRU lists should be
* scanned. The relative value of each set of LRU lists is determined
* by looking at the fraction of the pages scanned we did rotate back
* onto the active list instead of evict.
*
* nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
* nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
*/
static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
unsigned long *nr)
{
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
unsigned long anon_cost, file_cost, total_cost;
int swappiness = mem_cgroup_swappiness(memcg);
u64 fraction[ANON_AND_FILE];
u64 denominator = 0; /* gcc */
enum scan_balance scan_balance;
unsigned long ap, fp;
enum lru_list lru;
/* If we have no swap space, do not bother scanning anon pages. */
if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) {
scan_balance = SCAN_FILE;
goto out;
}
/*
* Global reclaim will swap to prevent OOM even with no
* swappiness, but memcg users want to use this knob to
* disable swapping for individual groups completely when
* using the memory controller's swap limit feature would be
* too expensive.
*/
if (cgroup_reclaim(sc) && !swappiness) {
scan_balance = SCAN_FILE;
goto out;
}
/*
* Do not apply any pressure balancing cleverness when the
* system is close to OOM, scan both anon and file equally
* (unless the swappiness setting disagrees with swapping).
*/
if (!sc->priority && swappiness) {
scan_balance = SCAN_EQUAL;
goto out;
}
/*
* If the system is almost out of file pages, force-scan anon.
*/
if (sc->file_is_tiny) {
scan_balance = SCAN_ANON;
goto out;
}
/*
* If there is enough inactive page cache, we do not reclaim
* anything from the anonymous working right now.
*/
if (sc->cache_trim_mode) {
scan_balance = SCAN_FILE;
goto out;
}
scan_balance = SCAN_FRACT;
/*
* Calculate the pressure balance between anon and file pages.
*
* The amount of pressure we put on each LRU is inversely
* proportional to the cost of reclaiming each list, as
* determined by the share of pages that are refaulting, times
* the relative IO cost of bringing back a swapped out
* anonymous page vs reloading a filesystem page (swappiness).
*
* Although we limit that influence to ensure no list gets
* left behind completely: at least a third of the pressure is
* applied, before swappiness.
*
* With swappiness at 100, anon and file have equal IO cost.
*/
total_cost = sc->anon_cost + sc->file_cost;
anon_cost = total_cost + sc->anon_cost;
file_cost = total_cost + sc->file_cost;
total_cost = anon_cost + file_cost;
ap = swappiness * (total_cost + 1);
ap /= anon_cost + 1;
fp = (200 - swappiness) * (total_cost + 1);
fp /= file_cost + 1;
fraction[0] = ap;
fraction[1] = fp;
denominator = ap + fp;
out:
for_each_evictable_lru(lru) {
int file = is_file_lru(lru);
unsigned long lruvec_size;
unsigned long low, min;
unsigned long scan;
lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
mem_cgroup_protection(sc->target_mem_cgroup, memcg,
&min, &low);
if (min || low) {
/*
* Scale a cgroup's reclaim pressure by proportioning
* its current usage to its memory.low or memory.min
* setting.
*
* This is important, as otherwise scanning aggression
* becomes extremely binary -- from nothing as we
* approach the memory protection threshold, to totally
* nominal as we exceed it. This results in requiring
* setting extremely liberal protection thresholds. It
* also means we simply get no protection at all if we
* set it too low, which is not ideal.
*
* If there is any protection in place, we reduce scan
* pressure by how much of the total memory used is
* within protection thresholds.
*
* There is one special case: in the first reclaim pass,
* we skip over all groups that are within their low
* protection. If that fails to reclaim enough pages to
* satisfy the reclaim goal, we come back and override
* the best-effort low protection. However, we still
* ideally want to honor how well-behaved groups are in
* that case instead of simply punishing them all
* equally. As such, we reclaim them based on how much
* memory they are using, reducing the scan pressure
* again by how much of the total memory used is under
* hard protection.
*/
unsigned long cgroup_size = mem_cgroup_size(memcg);
unsigned long protection;
/* memory.low scaling, make sure we retry before OOM */
if (!sc->memcg_low_reclaim && low > min) {
protection = low;
sc->memcg_low_skipped = 1;
} else {
protection = min;
}
/* Avoid TOCTOU with earlier protection check */
cgroup_size = max(cgroup_size, protection);
scan = lruvec_size - lruvec_size * protection /
(cgroup_size + 1);
/*
* Minimally target SWAP_CLUSTER_MAX pages to keep
* reclaim moving forwards, avoiding decrementing
* sc->priority further than desirable.
*/
scan = max(scan, SWAP_CLUSTER_MAX);
} else {
scan = lruvec_size;
}
scan >>= sc->priority;
/*
* If the cgroup's already been deleted, make sure to
* scrape out the remaining cache.
*/
if (!scan && !mem_cgroup_online(memcg))
scan = min(lruvec_size, SWAP_CLUSTER_MAX);
switch (scan_balance) {
case SCAN_EQUAL:
/* Scan lists relative to size */
break;
case SCAN_FRACT:
/*
* Scan types proportional to swappiness and
* their relative recent reclaim efficiency.
* Make sure we don't miss the last page on
* the offlined memory cgroups because of a
* round-off error.
*/
scan = mem_cgroup_online(memcg) ?
div64_u64(scan * fraction[file], denominator) :
DIV64_U64_ROUND_UP(scan * fraction[file],
denominator);
break;
case SCAN_FILE:
case SCAN_ANON:
/* Scan one type exclusively */
if ((scan_balance == SCAN_FILE) != file)
scan = 0;
break;
default:
/* Look ma, no brain */
BUG();
}
nr[lru] = scan;
}
}
コメント†