--- a/Makefile
+++ b/Makefile
@@ -10,10 +10,6 @@ NAME = Sneaky Weasel
# Comments in this file are targeted only to the developer, do not
# expect to learn how to build the kernel reading this file.
-CKVERSION = -ck1
-CKNAME = BFS Powered
-EXTRAVERSION := $(EXTRAVERSION)$(CKVERSION)
-
# Do not:
# o use make's built-in rules and variables
# (this increases performance and avoids hard-to-debug behaviour);
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -64,6 +64,11 @@ static struct timer_list spusched_timer;
static struct timer_list spuloadavg_timer;
/*
+ * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
+ */
+#define NORMAL_PRIO 120
+
+/*
* Frequency of the spu scheduler tick. By default we do one SPU scheduler
* tick for every 10 CPU scheduler ticks.
*/
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1072,7 +1072,7 @@ endchoice
choice
depends on EXPERIMENTAL
- prompt "Memory split"
+ prompt "Memory split" if EXPERT
default VMSPLIT_3G
depends on X86_32
---help---
@@ -1092,17 +1092,17 @@ choice
option alone!
config VMSPLIT_3G
- bool "Default 896MB lowmem (3G/1G user/kernel split)"
+ bool "3G/1G user/kernel split"
config VMSPLIT_3G_OPT
depends on !X86_PAE
- bool "1GB lowmem (3G/1G user/kernel split)"
+ bool "3G/1G user/kernel split (for full 1G low memory)"
config VMSPLIT_2G
- bool "2GB lowmem (2G/2G user/kernel split)"
+ bool "2G/2G user/kernel split"
config VMSPLIT_2G_OPT
depends on !X86_PAE
- bool "2GB lowmem (2G/2G user/kernel split)"
+ bool "2G/2G user/kernel split (for full 2G low memory)"
config VMSPLIT_1G
- bool "3GB lowmem (1G/3G user/kernel split)"
+ bool "1G/3G user/kernel split"
endchoice
config PAGE_OFFSET
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -109,7 +109,7 @@ static int show_cpuinfo(struct seq_file
seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
c->loops_per_jiffy/(500000/HZ),
- (c->loops_per_jiffy * 10 /(50000/HZ)) % 100);
+ (c->loops_per_jiffy/(5000/HZ)) % 100);
#ifdef CONFIG_X86_64
if (c->x86_tlbsize > 0)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -430,7 +430,7 @@ static void impress_friends(void)
"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
num_online_cpus(),
bogosum/(500000/HZ),
- (bogosum * 10/(50000/HZ))%100);
+ (bogosum/(5000/HZ))%100);
pr_debug("Before bogocount - setting activated=1.\n");
}
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -28,7 +28,6 @@
#include <linux/cpu.h>
#include <linux/completion.h>
#include <linux/mutex.h>
-#include <linux/sched.h>
#include <linux/syscore_ops.h>
#include <trace/events/power.h>
@@ -1704,12 +1703,6 @@ int __cpufreq_driver_target(struct cpufr
target_freq, relation);
if (cpu_online(policy->cpu) && cpufreq_driver->target)
retval = cpufreq_driver->target(policy, target_freq, relation);
- if (likely(retval != -EINVAL)) {
- if (target_freq == policy->max)
- cpu_nonscaling(policy->cpu);
- else
- cpu_scaling(policy->cpu);
- }
return retval;
}
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -29,8 +29,8 @@
* It helps to keep variable names smaller, simpler
*/
-#define DEF_FREQUENCY_UP_THRESHOLD (63)
-#define DEF_FREQUENCY_DOWN_THRESHOLD (26)
+#define DEF_FREQUENCY_UP_THRESHOLD (80)
+#define DEF_FREQUENCY_DOWN_THRESHOLD (20)
/*
* The polling frequency of this governor depends on the capability of
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -31,8 +31,8 @@
* It helps to keep variable names smaller, simpler
*/
-#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (26)
-#define DEF_FREQUENCY_UP_THRESHOLD (63)
+#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10)
+#define DEF_FREQUENCY_UP_THRESHOLD (80)
#define DEF_SAMPLING_DOWN_FACTOR (1)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3)
@@ -549,10 +549,10 @@ static void dbs_check_cpu(struct cpu_dbs
/*
* Every sampling_rate, we check, if current idle time is less
- * than 37% (default), then we try to increase frequency
+ * than 20% (default), then we try to increase frequency
* Every sampling_rate, we look for a the lowest
* frequency which can sustain the load while keeping idle time over
- * 63%. If such a frequency exist, we try to decrease to this frequency.
+ * 30%. If such a frequency exist, we try to decrease to this frequency.
*
* Any frequency increase takes it to the maximum frequency.
* Frequency reduction happens at minimum steps of
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -365,7 +365,7 @@ static int proc_pid_stack(struct seq_fil
static int proc_pid_schedstat(struct task_struct *task, char *buffer)
{
return sprintf(buffer, "%llu %llu %lu\n",
- (unsigned long long)tsk_seruntime(task),
+ (unsigned long long)task->se.sum_exec_runtime,
(unsigned long long)task->sched_info.run_delay,
task->sched_info.pcount);
}
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -64,8 +64,6 @@ static inline int task_ioprio_class(stru
static inline int task_nice_ioprio(struct task_struct *task)
{
- if (iso_task(task))
- return 0;
return (task_nice(task) + 20) / 5;
}
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -164,7 +164,7 @@ static inline u64 get_jiffies_64(void)
* Have the 32 bit jiffies value wrap 5 minutes after boot
* so jiffies wrap bugs show up earlier.
*/
-#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-10*HZ))
+#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
/*
* Change timeval to jiffies, trying to avoid the
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -23,11 +23,8 @@ static inline int page_is_file_cache(str
static inline void
__add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l,
- struct list_head *head, int tail)
+ struct list_head *head)
{
- if (tail)
- list_add_tail(&page->lru, head);
- else
list_add(&page->lru, head);
__mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
mem_cgroup_add_lru_list(page, l);
@@ -36,13 +33,7 @@ __add_page_to_lru_list(struct zone *zone
static inline void
add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
{
- __add_page_to_lru_list(zone, page, l, &zone->lru[l].list, 0);
-}
-
-static inline void
-add_page_to_lru_list_tail(struct zone *zone, struct page *page, enum lru_list l)
-{
- __add_page_to_lru_list(zone, page, l, &zone->lru[l].list, 1);
+ __add_page_to_lru_list(zone, page, l, &zone->lru[l].list);
}
static inline void
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -15,7 +15,6 @@
#include <linux/seqlock.h>
#include <linux/nodemask.h>
#include <linux/pageblock-flags.h>
-#include <linux/timer.h>
#include <generated/bounds.h>
#include <asm/atomic.h>
#include <asm/page.h>
@@ -163,14 +162,12 @@ enum zone_watermarks {
WMARK_MIN,
WMARK_LOW,
WMARK_HIGH,
- WMARK_LOTS,
NR_WMARK
};
#define min_wmark_pages(z) (z->watermark[WMARK_MIN])
#define low_wmark_pages(z) (z->watermark[WMARK_LOW])
#define high_wmark_pages(z) (z->watermark[WMARK_HIGH])
-#define lots_wmark_pages(z) (z->watermark[WMARK_LOTS])
struct per_cpu_pages {
int count; /* number of pages in the list */
@@ -644,7 +641,6 @@ typedef struct pglist_data {
wait_queue_head_t kswapd_wait;
struct task_struct *kswapd;
int kswapd_max_order;
- struct timer_list watermark_timer;
enum zone_type classzone_idx;
} pg_data_t;
--- a/include/linux/nfsd/stats.h
+++ b/include/linux/nfsd/stats.h
@@ -11,8 +11,8 @@
#include <linux/nfs4.h>
-/* thread usage wraps every one hundred thousand seconds (approx one day) */
-#define NFSD_USAGE_WRAP (HZ*100000)
+/* thread usage wraps very million seconds (approx one fortnight) */
+#define NFSD_USAGE_WRAP (HZ*1000000)
#ifdef __KERNEL__
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -458,8 +458,6 @@ int add_to_page_cache_locked(struct page
pgoff_t index, gfp_t gfp_mask);
int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);
-int __add_to_page_cache_lru(struct page *page, struct address_space *mapping,
- pgoff_t offset, gfp_t gfp_mask, int tail);
extern void delete_from_page_cache(struct page *page);
extern void __delete_from_page_cache(struct page *page);
int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -39,8 +39,6 @@
#define SCHED_BATCH 3
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE 5
-#define SCHED_IDLEPRIO SCHED_IDLE
-
/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
#define SCHED_RESET_ON_FORK 0x40000000
@@ -270,6 +268,8 @@ extern asmlinkage void schedule_tail(str
extern void init_idle(struct task_struct *idle, int cpu);
extern void init_idle_bootup_task(struct task_struct *idle);
+extern int runqueue_is_locked(int cpu);
+
extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern void select_nohz_load_balancer(int stop_tick);
@@ -1226,12 +1226,9 @@ struct task_struct {
#ifdef CONFIG_SMP
struct task_struct *wake_entry;
+ int on_cpu;
#endif
-#if defined(CONFIG_SMP)
- bool on_cpu;
-#endif
-#endif
- bool on_rq;
+ int on_rq;
int prio, static_prio, normal_prio;
unsigned int rt_priority;
@@ -1575,42 +1572,6 @@ struct task_struct {
#endif
};
-extern int runqueue_is_locked(int cpu);
-static inline void cpu_scaling(int cpu)
-{
-}
-
-static inline void cpu_nonscaling(int cpu)
-{
-}
-#define tsk_seruntime(t) ((t)->se.sum_exec_runtime)
-#define tsk_rttimeout(t) ((t)->rt.timeout)
-
-static inline void tsk_cpus_current(struct task_struct *p)
-{
- p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
-}
-
-static inline void print_scheduler_version(void)
-{
- printk(KERN_INFO"CFS CPU scheduler.\n");
-}
-
-static inline bool iso_task(struct task_struct *p)
-{
- return false;
-}
-
-static inline void remove_cpu(int cpu)
-{
-}
-
-/* Anyone feel like implementing this? */
-static inline int above_background_load(void)
-{
- return 1;
-}
-
/* Future-safe accessor for struct task_struct's cpus_allowed. */
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
@@ -1628,11 +1589,10 @@ static inline int above_background_load(
*/
#define MAX_USER_RT_PRIO 100
-#define MAX_RT_PRIO (MAX_USER_RT_PRIO + 1)
-#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
+#define MAX_RT_PRIO MAX_USER_RT_PRIO
#define MAX_PRIO (MAX_RT_PRIO + 40)
-#define NORMAL_PRIO DEFAULT_PRIO
+#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
static inline int rt_prio(int prio)
{
@@ -1982,7 +1942,7 @@ extern unsigned long long
task_sched_runtime(struct task_struct *task);
/* sched_exec is called by processes performing an exec */
-#if defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
extern void sched_exec(void);
#else
#define sched_exec() {}
@@ -2611,7 +2571,7 @@ extern void signal_wake_up(struct task_s
*/
#ifdef CONFIG_SMP
-static inline int task_cpu(const struct task_struct *p)
+static inline unsigned int task_cpu(const struct task_struct *p)
{
return task_thread_info(p)->cpu;
}
@@ -2620,12 +2580,12 @@ extern void set_task_cpu(struct task_str
#else
-static inline int task_cpu(const struct task_struct *p)
+static inline unsigned int task_cpu(const struct task_struct *p)
{
return 0;
}
-static inline void set_task_cpu(struct task_struct *p, int cpu)
+static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
{
}
@@ -2739,3 +2699,5 @@ static inline unsigned long rlimit_max(u
}
#endif /* __KERNEL__ */
+
+#endif
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -201,7 +201,7 @@ struct swap_list_t {
int next; /* swapfile to be used next */
};
-/* Swap 50% full? */
+/* Swap 50% full? Release swapcache more aggressively.. */
#define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
/* linux/mm/page_alloc.c */
@@ -215,7 +215,6 @@ extern unsigned int nr_free_pagecache_pa
/* linux/mm/swap.c */
-extern void ____lru_cache_add(struct page *, enum lru_list lru, int tail);
extern void __lru_cache_add(struct page *, enum lru_list lru);
extern void lru_cache_add_lru(struct page *, enum lru_list lru);
extern void lru_add_page_tail(struct zone* zone,
@@ -239,14 +238,9 @@ static inline void lru_cache_add_anon(st
__lru_cache_add(page, LRU_INACTIVE_ANON);
}
-static inline void lru_cache_add_file_tail(struct page *page, int tail)
-{
- ____lru_cache_add(page, LRU_INACTIVE_FILE, tail);
-}
-
static inline void lru_cache_add_file(struct page *page)
{
- ____lru_cache_add(page, LRU_INACTIVE_FILE, 0);
+ __lru_cache_add(page, LRU_INACTIVE_FILE);
}
/* LRU Isolation modes. */
@@ -356,10 +350,9 @@ extern void grab_swap_token(struct mm_st
extern void __put_swap_token(struct mm_struct *);
extern void disable_swap_token(struct mem_cgroup *memcg);
-/* Only allow swap token to have effect if swap is full */
static inline int has_swap_token(struct mm_struct *mm)
{
- return (mm == swap_token_mm && vm_swap_full());
+ return (mm == swap_token_mm);
}
static inline void put_swap_token(struct mm_struct *mm)
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -39,8 +39,8 @@ struct inet_hashinfo;
* If time > 4sec, it is "slow" path, no recycling is required,
* so that we select tick to get range about 4 seconds.
*/
-#if HZ <= 16 || HZ > 16384
-# error Unsupported: HZ <= 16 or HZ > 16384
+#if HZ <= 16 || HZ > 4096
+# error Unsupported: HZ <= 16 or HZ > 4096
#elif HZ <= 32
# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 64
@@ -55,12 +55,8 @@ struct inet_hashinfo;
# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 2048
# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
-#elif HZ <= 4096
-# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
-#elif HZ <= 8192
-# define INET_TWDR_RECYCLE_TICK (13 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#else
-# define INET_TWDR_RECYCLE_TICK (14 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#endif
/* TIME_WAIT reaping mechanism. */
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -269,7 +269,7 @@ void __cpuinit calibrate_delay(void)
if (!printed)
pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
lpj/(500000/HZ),
- (lpj * 10 /(50000 / HZ)) % 100, lpj);
+ (lpj/(5000/HZ)) % 100, lpj);
loops_per_jiffy = lpj;
printed = true;
--- a/init/main.c
+++ b/init/main.c
@@ -775,7 +775,6 @@ static noinline int init_post(void)
system_state = SYSTEM_RUNNING;
numa_default_policy();
- print_scheduler_version();
current->signal->flags |= SIGNAL_UNKILLABLE;
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_1000
+ default HZ_250
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -23,14 +23,13 @@ choice
with lots of processors that may show reduced performance if
too many timer interrupts are occurring.
- config HZ_250_NODEFAULT
+ config HZ_250
bool "250 HZ"
help
- 250 HZ is a lousy compromise choice allowing server interactivity
- while also showing desktop throughput and no extra power saving on
- laptops. No good for anything.
-
- Recommend 100 or 1000 instead.
+ 250 Hz is a good compromise choice allowing server performance
+ while also showing good interactive responsiveness even
+ on SMP and NUMA systems. If you are going to be using NTSC video
+ or multimedia, selected 300Hz instead.
config HZ_300
bool "300 HZ"
@@ -44,82 +43,16 @@ choice
bool "1000 HZ"
help
1000 Hz is the preferred choice for desktop systems and other
- systems requiring fast interactive responses to events. Laptops
- can also benefit from this choice without sacrificing battery life
- if dynticks is also enabled.
-
- config HZ_1500
- bool "1500 HZ"
- help
- 1500 Hz is an insane value to use to run broken software that is Hz
- limited.
-
- Being over 1000, driver breakage is likely.
-
- config HZ_2000
- bool "2000 HZ"
- help
- 2000 Hz is an insane value to use to run broken software that is Hz
- limited.
-
- Being over 1000, driver breakage is likely.
-
- config HZ_3000
- bool "3000 HZ"
- help
- 3000 Hz is an insane value to use to run broken software that is Hz
- limited.
-
- Being over 1000, driver breakage is likely.
-
- config HZ_4000
- bool "4000 HZ"
- help
- 4000 Hz is an insane value to use to run broken software that is Hz
- limited.
-
- Being over 1000, driver breakage is likely.
-
- config HZ_5000
- bool "5000 HZ"
- help
- 5000 Hz is an obscene value to use to run broken software that is Hz
- limited.
-
- Being over 1000, driver breakage is likely.
-
- config HZ_7500
- bool "7500 HZ"
- help
- 7500 Hz is an obscene value to use to run broken software that is Hz
- limited.
-
- Being over 1000, driver breakage is likely.
-
- config HZ_10000
- bool "10000 HZ"
- help
- 10000 Hz is an obscene value to use to run broken software that is Hz
- limited.
-
- Being over 1000, driver breakage is likely.
-
+ systems requiring fast interactive responses to events.
endchoice
config HZ
int
default 100 if HZ_100
- default 250 if HZ_250_NODEFAULT
+ default 250 if HZ_250
default 300 if HZ_300
default 1000 if HZ_1000
- default 1500 if HZ_1500
- default 2000 if HZ_2000
- default 3000 if HZ_3000
- default 4000 if HZ_4000
- default 5000 if HZ_5000
- default 7500 if HZ_7500
- default 10000 if HZ_10000
config SCHED_HRTICK
def_bool HIGH_RES_TIMERS && (!SMP || USE_GENERIC_SMP_HELPERS)
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -1,7 +1,7 @@
choice
prompt "Preemption Model"
- default PREEMPT
+ default PREEMPT_NONE
config PREEMPT_NONE
bool "No Forced Preemption (Server)"
@@ -17,7 +17,7 @@ config PREEMPT_NONE
latencies.
config PREEMPT_VOLUNTARY
- bool "Voluntary Kernel Preemption (Nothing)"
+ bool "Voluntary Kernel Preemption (Desktop)"
help
This option reduces the latency of the kernel by adding more
"explicit preemption points" to the kernel code. These new
@@ -31,8 +31,7 @@ config PREEMPT_VOLUNTARY
applications to run more 'smoothly' even when the system is
under load.
- Select this for no system in particular (choose Preemptible
- instead on a desktop if you know what's good for you).
+ Select this if you are building a kernel for a desktop system.
config PREEMPT
bool "Preemptible Kernel (Low-Latency Desktop)"
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -128,7 +128,7 @@ int __delayacct_add_tsk(struct taskstats
*/
t1 = tsk->sched_info.pcount;
t2 = tsk->sched_info.run_delay;
- t3 = tsk_seruntime(tsk);
+ t3 = tsk->se.sum_exec_runtime;
d->cpu_count += t1;
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -132,7 +132,7 @@ static void __exit_signal(struct task_st
sig->inblock += task_io_get_inblock(tsk);
sig->oublock += task_io_get_oublock(tsk);
task_io_accounting_add(&sig->ioac, &tsk->ioac);
- sig->sum_sched_runtime += tsk_seruntime(tsk);
+ sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
}
sig->nr_threads--;
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -250,7 +250,7 @@ void thread_group_cputime(struct task_st
do {
times->utime = cputime_add(times->utime, t->utime);
times->stime = cputime_add(times->stime, t->stime);
- times->sum_exec_runtime += tsk_seruntime(t);
+ times->sum_exec_runtime += task_sched_runtime(t);
} while_each_thread(tsk, t);
out:
rcu_read_unlock();
@@ -512,7 +512,7 @@ static void cleanup_timers(struct list_h
void posix_cpu_timers_exit(struct task_struct *tsk)
{
cleanup_timers(tsk->cpu_timers,
- tsk->utime, tsk->stime, tsk_seruntime(tsk));
+ tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
}
void posix_cpu_timers_exit_group(struct task_struct *tsk)
@@ -522,7 +522,7 @@ void posix_cpu_timers_exit_group(struct
cleanup_timers(tsk->signal->cpu_timers,
cputime_add(tsk->utime, sig->utime),
cputime_add(tsk->stime, sig->stime),
- tsk_seruntime(tsk) + sig->sum_sched_runtime);
+ tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
}
static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
@@ -953,7 +953,7 @@ static void check_thread_timers(struct t
struct cpu_timer_list *t = list_first_entry(timers,
struct cpu_timer_list,
entry);
- if (!--maxfire || tsk_seruntime(tsk) < t->expires.sched) {
+ if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
tsk->cputime_expires.sched_exp = t->expires.sched;
break;
}
@@ -970,7 +970,7 @@ static void check_thread_timers(struct t
ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
if (hard != RLIM_INFINITY &&
- tsk_rttimeout(tsk) > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
+ tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
/*
* At the hard limit, we just die.
* No need to calculate anything else now.
@@ -978,7 +978,7 @@ static void check_thread_timers(struct t
__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
return;
}
- if (tsk_rttimeout(tsk) > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
+ if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
/*
* At the soft limit, send a SIGXCPU every second.
*/
@@ -1280,7 +1280,7 @@ static inline int fastpath_timer_check(s
struct task_cputime task_sample = {
.utime = tsk->utime,
.stime = tsk->stime,
- .sum_exec_runtime = tsk_seruntime(tsk)
+ .sum_exec_runtime = tsk->se.sum_exec_runtime
};
if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9449,3 +9449,4 @@ struct cgroup_subsys cpuacct_subsys = {
.subsys_id = cpuacct_subsys_id,
};
#endif /* CONFIG_CGROUP_CPUACCT */
+
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -121,7 +121,7 @@ static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
static int __maybe_unused three = 3;
static unsigned long one_ul = 1;
-static int __maybe_unused one_hundred = 100;
+static int one_hundred = 100;
#ifdef CONFIG_PRINTK
static int ten_thousand = 10000;
#endif
@@ -258,7 +258,7 @@ static struct ctl_table root_table[] = {
{ }
};
-#if defined(CONFIG_SCHED_DEBUG)
+#ifdef CONFIG_SCHED_DEBUG
static int min_sched_granularity_ns = 100000; /* 100 usecs */
static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
static int min_wakeup_granularity_ns; /* 0 usecs */
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -484,8 +484,8 @@ out:
}
EXPORT_SYMBOL(add_to_page_cache_locked);
-int __add_to_page_cache_lru(struct page *page, struct address_space *mapping,
- pgoff_t offset, gfp_t gfp_mask, int tail)
+int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
+ pgoff_t offset, gfp_t gfp_mask)
{
int ret;
@@ -501,18 +501,12 @@ int __add_to_page_cache_lru(struct page
ret = add_to_page_cache(page, mapping, offset, gfp_mask);
if (ret == 0) {
if (page_is_file_cache(page))
- lru_cache_add_file_tail(page, tail);
+ lru_cache_add_file(page);
else
lru_cache_add_anon(page);
}
return ret;
}
-
-int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
- pgoff_t offset, gfp_t gfp_mask)
-{
- return __add_to_page_cache_lru(page, mapping, offset, gfp_mask, 0);
-}
EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
#ifdef CONFIG_NUMA
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3003,7 +3003,7 @@ static int do_swap_page(struct mm_struct
mem_cgroup_commit_charge_swapin(page, ptr);
swap_free(entry);
- if ((vma->vm_flags & VM_LOCKED) || PageMlocked(page))
+ if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
try_to_free_swap(page);
unlock_page(page);
if (swapcache) {
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -78,7 +78,7 @@ int vm_highmem_is_dirtyable;
/*
* The generator of dirty data starts writeback at this percentage
*/
-int vm_dirty_ratio = 1;
+int vm_dirty_ratio = 20;
/*
* vm_dirty_bytes starts at 0 (disabled) so that it is a function of
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -17,7 +17,6 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/pagevec.h>
#include <linux/pagemap.h>
-#include <linux/swap.h>
/*
* Initialise a struct file's readahead state. Assumes that the caller has
@@ -108,7 +107,7 @@ int read_cache_pages(struct address_spac
EXPORT_SYMBOL(read_cache_pages);
static int read_pages(struct address_space *mapping, struct file *filp,
- struct list_head *pages, unsigned nr_pages, int tail)
+ struct list_head *pages, unsigned nr_pages)
{
struct blk_plug plug;
unsigned page_idx;
@@ -126,8 +125,8 @@ static int read_pages(struct address_spa
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_to_page(pages);
list_del(&page->lru);
- if (!__add_to_page_cache_lru(page, mapping,
- page->index, GFP_KERNEL, tail)) {
+ if (!add_to_page_cache_lru(page, mapping,
+ page->index, GFP_KERNEL)) {
mapping->a_ops->readpage(filp, page);
}
page_cache_release(page);
@@ -140,28 +139,6 @@ out:
return ret;
}
-static inline int nr_mapped(void)
-{
- return global_page_state(NR_FILE_MAPPED) +
- global_page_state(NR_ANON_PAGES);
-}
-
-/*
- * This examines how large in pages a file size is and returns 1 if it is
- * more than half the unmapped ram. Avoid doing read_page_state which is
- * expensive unless we already know it is likely to be large enough.
- */
-static int large_isize(unsigned long nr_pages)
-{
- if (nr_pages * 6 > vm_total_pages) {
- unsigned long unmapped_ram = vm_total_pages - nr_mapped();
-
- if (nr_pages * 2 > unmapped_ram)
- return 1;
- }
- return 0;
-}
-
/*
* __do_page_cache_readahead() actually reads a chunk of disk. It allocates all
* the pages first, then submits them all for I/O. This avoids the very bad
@@ -219,8 +196,7 @@ __do_page_cache_readahead(struct address
* will then handle the error.
*/
if (ret)
- read_pages(mapping, filp, &page_pool, ret,
- large_isize(end_index));
+ read_pages(mapping, filp, &page_pool, ret);
BUG_ON(!list_empty(&page_pool));
out:
return ret;
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -371,23 +371,15 @@ void mark_page_accessed(struct page *pag
EXPORT_SYMBOL(mark_page_accessed);
-void ______pagevec_lru_add(struct pagevec *pvec, enum lru_list lru, int tail);
-
-void ____lru_cache_add(struct page *page, enum lru_list lru, int tail)
+void __lru_cache_add(struct page *page, enum lru_list lru)
{
struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
page_cache_get(page);
if (!pagevec_add(pvec, page))
- ______pagevec_lru_add(pvec, lru, tail);
+ ____pagevec_lru_add(pvec, lru);
put_cpu_var(lru_add_pvecs);
}
-EXPORT_SYMBOL(____lru_cache_add);
-
-void __lru_cache_add(struct page *page, enum lru_list lru)
-{
- ____lru_cache_add(page, lru, 0);
-}
EXPORT_SYMBOL(__lru_cache_add);
/**
@@ -395,7 +387,7 @@ EXPORT_SYMBOL(__lru_cache_add);
* @page: the page to be added to the LRU.
* @lru: the LRU list to which the page is added.
*/
-void __lru_cache_add_lru(struct page *page, enum lru_list lru, int tail)
+void lru_cache_add_lru(struct page *page, enum lru_list lru)
{
if (PageActive(page)) {
VM_BUG_ON(PageUnevictable(page));
@@ -406,12 +398,7 @@ void __lru_cache_add_lru(struct page *pa
}
VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
- ____lru_cache_add(page, lru, tail);
-}
-
-void lru_cache_add_lru(struct page *page, enum lru_list lru)
-{
- __lru_cache_add_lru(page, lru, 0);
+ __lru_cache_add(page, lru);
}
/**
@@ -698,7 +685,7 @@ void lru_add_page_tail(struct zone* zone
head = page->lru.prev;
else
head = &zone->lru[lru].list;
- __add_page_to_lru_list(zone, page_tail, lru, head, 0);
+ __add_page_to_lru_list(zone, page_tail, lru, head);
} else {
SetPageUnevictable(page_tail);
add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
@@ -727,18 +714,13 @@ static void ____pagevec_lru_add_fn(struc
* Add the passed pages to the LRU, then drop the caller's refcount
* on them. Reinitialises the caller's pagevec.
*/
-void ______pagevec_lru_add(struct pagevec *pvec, enum lru_list lru, int tail)
+void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
{
VM_BUG_ON(is_unevictable_lru(lru));
pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru);
}
-void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
-{
- ______pagevec_lru_add(pvec, lru, 0);
-}
-
EXPORT_SYMBOL(____pagevec_lru_add);
/*
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -289,7 +289,7 @@ checks:
scan_base = offset = si->lowest_bit;
/* reuse swap entry of cache-only swap if not busy. */
- if (si->swap_map[offset] == SWAP_HAS_CACHE) {
+ if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
int swap_was_freed;
spin_unlock(&swap_lock);
swap_was_freed = __try_to_reclaim_swap(si, offset);
@@ -378,7 +378,7 @@ scan:
spin_lock(&swap_lock);
goto checks;
}
- if (si->swap_map[offset] == SWAP_HAS_CACHE) {
+ if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
spin_lock(&swap_lock);
goto checks;
}
@@ -393,7 +393,7 @@ scan:
spin_lock(&swap_lock);
goto checks;
}
- if (si->swap_map[offset] == SWAP_HAS_CACHE) {
+ if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
spin_lock(&swap_lock);
goto checks;
}
@@ -707,7 +707,8 @@ int free_swap_and_cache(swp_entry_t entr
* Not mapped elsewhere, or swap space full? Free it!
* Also recheck PageSwapCache now page is locked (above).
*/
- if (PageSwapCache(page) && !PageWriteback(page)) {
+ if (PageSwapCache(page) && !PageWriteback(page) &&
+ (!page_mapped(page) || vm_swap_full())) {
delete_from_swap_cache(page);
SetPageDirty(page);
}
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -37,7 +37,6 @@
#include <linux/rwsem.h>
#include <linux/delay.h>
#include <linux/kthread.h>
-#include <linux/timer.h>
#include <linux/freezer.h>
#include <linux/memcontrol.h>
#include <linux/delayacct.h>
@@ -149,7 +148,7 @@ struct scan_control {
/*
* From 0 .. 100. Higher means more swappy.
*/
-int vm_swappiness;
+int vm_swappiness = 60;
long vm_total_pages; /* The total number of pages which the VM controls */
static LIST_HEAD(shrinker_list);
@@ -933,7 +932,7 @@ cull_mlocked:
activate_locked:
/* Not a candidate for swapping, so reclaim swap space. */
- if (PageSwapCache(page))
+ if (PageSwapCache(page) && vm_swap_full())
try_to_free_swap(page);
VM_BUG_ON(PageActive(page));
SetPageActive(page);
@@ -1987,35 +1986,6 @@ restart:
}
/*
- * Helper functions to adjust nice level of kswapd, based on the priority of
- * the task (p) that called it. If it is already higher priority we do not
- * demote its nice level since it is still working on behalf of a higher
- * priority task. With kernel threads we leave it at nice 0.
- *
- * We don't ever run kswapd real time, so if a real time task calls kswapd we
- * set it to highest SCHED_NORMAL priority.
- */
-static inline int effective_sc_prio(struct task_struct *p)
-{
- if (likely(p->mm)) {
- if (rt_task(p))
- return -20;
- if (p->policy == SCHED_IDLEPRIO)
- return 19;
- return task_nice(p);
- }
- return 0;
-}
-
-static void set_kswapd_nice(struct task_struct *kswapd, int active)
-{
- long nice = effective_sc_prio(current);
-
- if (task_nice(kswapd) > nice || !active)
- set_user_nice(kswapd, nice);
-}
-
-/*
* This is the direct reclaim path, for page-allocating processes. We only
* try to reclaim pages from zones which will satisfy the caller's allocation
* request.
@@ -2736,8 +2706,6 @@ static void kswapd_try_to_sleep(pg_data_
finish_wait(&pgdat->kswapd_wait, &wait);
}
-#define WT_EXPIRY (HZ * 5) /* Time to wakeup watermark_timer */
-
/*
* The background pageout daemon, started as a kernel thread
* from the init process.
@@ -2789,9 +2757,6 @@ static int kswapd(void *p)
for ( ; ; ) {
int ret;
- /* kswapd has been busy so delay watermark_timer */
- mod_timer(&pgdat->watermark_timer, jiffies + WT_EXPIRY);
-
/*
* If the last balance_pgdat was unsuccessful it's unlikely a
* new request of a similar or harder type will succeed soon
@@ -2841,7 +2806,6 @@ static int kswapd(void *p)
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
{
pg_data_t *pgdat;
- int active;
if (!populated_zone(zone))
return;
@@ -2853,9 +2817,7 @@ void wakeup_kswapd(struct zone *zone, in
pgdat->kswapd_max_order = order;
pgdat->classzone_idx = min(pgdat->classzone_idx, classzone_idx);
}
- active = waitqueue_active(&pgdat->kswapd_wait);
- set_kswapd_nice(pgdat->kswapd, active);
- if (!active)
+ if (!waitqueue_active(&pgdat->kswapd_wait))
return;
if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
return;
@@ -2968,57 +2930,20 @@ static int __devinit cpu_callback(struct
}
/*
- * We wake up kswapd every WT_EXPIRY till free ram is above pages_lots
- */
-static void watermark_wakeup(unsigned long data)
-{
- pg_data_t *pgdat = (pg_data_t *)data;
- struct timer_list *wt = &pgdat->watermark_timer;
- int i;
-
- if (!waitqueue_active(&pgdat->kswapd_wait) || above_background_load())
- goto out;
- for (i = pgdat->nr_zones - 1; i >= 0; i--) {
- struct zone *z = pgdat->node_zones + i;
-
- if (!populated_zone(z) || is_highmem(z)) {
- /* We are better off leaving highmem full */
- continue;
- }
- if (!zone_watermark_ok(z, 0, lots_wmark_pages(z), 0, 0)) {
- wake_up_interruptible(&pgdat->kswapd_wait);
- goto out;
- }
- }
-out:
- mod_timer(wt, jiffies + WT_EXPIRY);
- return;
-}
-
-/*
* This kswapd start function will be called by init and node-hot-add.
* On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
*/
int kswapd_run(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
- struct timer_list *wt;
int ret = 0;
if (pgdat->kswapd)
return 0;
- wt = &pgdat->watermark_timer;
- init_timer(wt);
- wt->data = (unsigned long)pgdat;
- wt->function = watermark_wakeup;
- wt->expires = jiffies + WT_EXPIRY;
- add_timer(wt);
-
pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
if (IS_ERR(pgdat->kswapd)) {
/* failure at boot is fatal */
- del_timer(wt);
BUG_ON(system_state == SYSTEM_BOOTING);
printk("Failed to start kswapd on node %d\n",nid);
ret = -1;