diff options
Diffstat (limited to 'kernel/power')
-rw-r--r-- | kernel/power/Kconfig | 289 | ||||
-rw-r--r-- | kernel/power/Makefile | 16 | ||||
-rw-r--r-- | kernel/power/autosleep.c | 127 | ||||
-rw-r--r-- | kernel/power/block_io.c | 103 | ||||
-rw-r--r-- | kernel/power/console.c | 33 | ||||
-rw-r--r-- | kernel/power/hibernate.c | 1306 | ||||
-rw-r--r-- | kernel/power/main.c | 661 | ||||
-rw-r--r-- | kernel/power/power.h | 310 | ||||
-rw-r--r-- | kernel/power/poweroff.c | 46 | ||||
-rw-r--r-- | kernel/power/process.c | 220 | ||||
-rw-r--r-- | kernel/power/qos.c | 538 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 2433 | ||||
-rw-r--r-- | kernel/power/suspend.c | 437 | ||||
-rw-r--r-- | kernel/power/suspend_test.c | 188 | ||||
-rw-r--r-- | kernel/power/suspend_time.c | 111 | ||||
-rw-r--r-- | kernel/power/swap.c | 2064 | ||||
-rw-r--r-- | kernel/power/user.c | 464 | ||||
-rw-r--r-- | kernel/power/wakelock.c | 259 |
18 files changed, 9605 insertions, 0 deletions
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig new file mode 100644 index 00000000..a6fd57a9 --- /dev/null +++ b/kernel/power/Kconfig @@ -0,0 +1,289 @@ +config SUSPEND + bool "Suspend to RAM and standby" + depends on ARCH_SUSPEND_POSSIBLE + default y + ---help--- + Allow the system to enter sleep states in which main memory is + powered and thus its contents are preserved, such as the + suspend-to-RAM state (e.g. the ACPI S3 state). + +config SUSPEND_FREEZER + bool "Enable freezer for suspend to RAM/standby" \ + if ARCH_WANTS_FREEZER_CONTROL || BROKEN + depends on SUSPEND + default y + help + This allows you to turn off the freezer for suspend. If this is + done, no tasks are frozen for suspend to RAM/standby. + + Turning OFF this setting is NOT recommended! If in doubt, say Y. + +config HAS_WAKELOCK + bool + default y + +config WAKELOCK + bool + default y + +config HIBERNATE_CALLBACKS + bool + +config HIBERNATION + bool "Hibernation (aka 'suspend to disk')" + depends on SWAP && ARCH_HIBERNATION_POSSIBLE + select HIBERNATE_CALLBACKS + select LZO_COMPRESS + select LZO_DECOMPRESS + select LZ4_COMPRESS + select LZ4_DECOMPRESS + select CRC32 + ---help--- + Enable the suspend to disk (STD) functionality, which is usually + called "hibernation" in user interfaces. STD checkpoints the + system and powers it off; and restores that checkpoint on reboot. + + You can suspend your machine with 'echo disk > /sys/power/state' + after placing resume=/dev/swappartition on the kernel command line + in your bootloader's configuration file. + + Alternatively, you can use the additional userland tools available + from <http://suspend.sf.net>. + + In principle it does not require ACPI or APM, although for example + ACPI will be used for the final steps when it is available. One + of the reasons to use software suspend is that the firmware hooks + for suspend states like suspend-to-RAM (STR) often don't work very + well with Linux. + + It creates an image which is saved in your active swap. Upon the next + boot, pass the 'resume=/dev/swappartition' argument to the kernel to + have it detect the saved image, restore memory state from it, and + continue to run as before. If you do not want the previous state to + be reloaded, then use the 'noresume' kernel command line argument. + Note, however, that fsck will be run on your filesystems and you will + need to run mkswap against the swap partition used for the suspend. + + It also works with swap files to a limited extent (for details see + <file:Documentation/power/swsusp-and-swap-files.txt>). + + Right now you may boot without resuming and resume later but in the + meantime you cannot use the swap partition(s)/file(s) involved in + suspending. Also in this case you must not use the filesystems + that were mounted before the suspend. In particular, you MUST NOT + MOUNT any journaled filesystems mounted before the suspend or they + will get corrupted in a nasty way. + + For more information take a look at <file:Documentation/power/swsusp.txt>. + +config ARCH_SAVE_PAGE_KEYS + bool + +config PM_STD_PARTITION + string "Default resume partition" + depends on HIBERNATION + default "" + ---help--- + The default resume partition is the partition that the suspend- + to-disk implementation will look for a suspended disk image. + + The partition specified here will be different for almost every user. + It should be a valid swap partition (at least for now) that is turned + on before suspending. + + The partition specified can be overridden by specifying: + + resume=/dev/<other device> + + which will set the resume partition to the device specified. + + Note there is currently not a way to specify which device to save the + suspended image to. It will simply pick the first available swap + device. + +config PM_SLEEP + def_bool y + depends on SUSPEND || HIBERNATE_CALLBACKS + +config PM_SLEEP_SMP + def_bool y + depends on SMP + depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE + depends on PM_SLEEP + select HOTPLUG + select HOTPLUG_CPU + +config PM_AUTOSLEEP + bool "Opportunistic sleep" + depends on PM_SLEEP + default n + ---help--- + Allow the kernel to trigger a system transition into a global sleep + state automatically whenever there are no active wakeup sources. + +config PM_WAKELOCKS + bool "User space wakeup sources interface" + depends on PM_SLEEP + default n + ---help--- + Allow user space to create, activate and deactivate wakeup source + objects with the help of a sysfs-based interface. + +config PM_WAKELOCKS_LIMIT + int "Maximum number of user space wakeup sources (0 = no limit)" + range 0 100000 + default 100 + depends on PM_WAKELOCKS + +config PM_WAKELOCKS_GC + bool "Garbage collector for user space wakeup sources" + depends on PM_WAKELOCKS + default y + +config PM_RUNTIME + bool "Run-time PM core functionality" + depends on !IA64_HP_SIM + ---help--- + Enable functionality allowing I/O devices to be put into energy-saving + (low power) states at run time (or autosuspended) after a specified + period of inactivity and woken up in response to a hardware-generated + wake-up event or a driver's request. + + Hardware support is generally required for this functionality to work + and the bus type drivers of the buses the devices are on are + responsible for the actual handling of the autosuspend requests and + wake-up events. + +config PM + def_bool y + depends on PM_SLEEP || PM_RUNTIME + +config PM_DEBUG + bool "Power Management Debug Support" + depends on PM + ---help--- + This option enables various debugging support in the Power Management + code. This is helpful when debugging and reporting PM bugs, like + suspend support. + +config PM_ADVANCED_DEBUG + bool "Extra PM attributes in sysfs for low-level debugging/testing" + depends on PM_DEBUG + ---help--- + Add extra sysfs attributes allowing one to access some Power Management + fields of device objects from user space. If you are not a kernel + developer interested in debugging/testing Power Management, say "no". + +config PM_TEST_SUSPEND + bool "Test suspend/resume and wakealarm during bootup" + depends on SUSPEND && PM_DEBUG && RTC_CLASS=y + ---help--- + This option will let you suspend your machine during bootup, and + make it wake up a few seconds later using an RTC wakeup alarm. + Enable this with a kernel parameter like "test_suspend=mem". + + You probably want to have your system's RTC driver statically + linked, ensuring that it's available when this test runs. + +config CAN_PM_TRACE + def_bool y + depends on PM_DEBUG && PM_SLEEP + +config PM_TRACE + bool + help + This enables code to save the last PM event point across + reboot. The architecture needs to support this, x86 for + example does by saving things in the RTC, see below. + + The architecture specific code must provide the extern + functions from <linux/resume-trace.h> as well as the + <asm/resume-trace.h> header with a TRACE_RESUME() macro. + + The way the information is presented is architecture- + dependent, x86 will print the information during a + late_initcall. + +config PM_TRACE_RTC + bool "Suspend/resume event tracing" + depends on CAN_PM_TRACE + depends on X86 + select PM_TRACE + ---help--- + This enables some cheesy code to save the last PM event point in the + RTC across reboots, so that you can debug a machine that just hangs + during suspend (or more commonly, during resume). + + To use this debugging feature you should attempt to suspend the + machine, reboot it and then run + + dmesg -s 1000000 | grep 'hash matches' + + CAUTION: this option will cause your machine's real-time clock to be + set to an invalid time after a resume. + +config APM_EMULATION + tristate "Advanced Power Management Emulation" + depends on PM && SYS_SUPPORTS_APM_EMULATION + help + APM is a BIOS specification for saving power using several different + techniques. This is mostly useful for battery powered laptops with + APM compliant BIOSes. If you say Y here, the system time will be + reset after a RESUME operation, the /proc/apm device will provide + battery status information, and user-space programs will receive + notification of APM "events" (e.g. battery status change). + + In order to use APM, you will need supporting software. For location + and more information, read <file:Documentation/power/apm-acpi.txt> + and the Battery Powered Linux mini-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. + + This driver does not spin down disk drives (see the hdparm(8) + manpage ("man 8 hdparm") for that), and it doesn't turn off + VESA-compliant "green" monitors. + + Generally, if you don't have a battery in your machine, there isn't + much point in using this driver and you should say N. If you get + random kernel OOPSes or reboots that don't seem to be related to + anything, try disabling/enabling this option (or disabling/enabling + APM in your BIOS). + +config ARCH_HAS_OPP + bool + +config PM_OPP + bool "Operating Performance Point (OPP) Layer library" + depends on ARCH_HAS_OPP + ---help--- + SOCs have a standard set of tuples consisting of frequency and + voltage pairs that the device will support per voltage domain. This + is called Operating Performance Point or OPP. The actual definitions + of OPP varies over silicon within the same family of devices. + + OPP layer organizes the data internally using device pointers + representing individual voltage domains and provides SOC + implementations a ready to use framework to manage OPPs. + For more information, read <file:Documentation/power/opp.txt> + +config PM_CLK + def_bool y + depends on PM && HAVE_CLK + +config PM_GENERIC_DOMAINS + bool + depends on PM + +config PM_GENERIC_DOMAINS_RUNTIME + def_bool y + depends on PM_RUNTIME && PM_GENERIC_DOMAINS + +config CPU_PM + bool + depends on SUSPEND || CPU_IDLE + +config SUSPEND_TIME + bool "Log time spent in suspend" + ---help--- + Prints the time spent in suspend in the kernel log, and + keeps statistics on the time spent in suspend in + /sys/kernel/debug/suspend_time diff --git a/kernel/power/Makefile b/kernel/power/Makefile new file mode 100644 index 00000000..8450b85d --- /dev/null +++ b/kernel/power/Makefile @@ -0,0 +1,16 @@ + +ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG + +obj-y += qos.o +obj-$(CONFIG_PM) += main.o +obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o +obj-$(CONFIG_FREEZER) += process.o +obj-$(CONFIG_SUSPEND) += suspend.o +obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o +obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ + block_io.o +obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o +obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o +obj-$(CONFIG_SUSPEND_TIME) += suspend_time.o + +obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c new file mode 100644 index 00000000..ca304046 --- /dev/null +++ b/kernel/power/autosleep.c @@ -0,0 +1,127 @@ +/* + * kernel/power/autosleep.c + * + * Opportunistic sleep support. + * + * Copyright (C) 2012 Rafael J. Wysocki <rjw@sisk.pl> + */ + +#include <linux/device.h> +#include <linux/mutex.h> +#include <linux/pm_wakeup.h> + +#include "power.h" + +static suspend_state_t autosleep_state; +static struct workqueue_struct *autosleep_wq; +/* + * Note: it is only safe to mutex_lock(&autosleep_lock) if a wakeup_source + * is active, otherwise a deadlock with try_to_suspend() is possible. + * Alternatively mutex_lock_interruptible() can be used. This will then fail + * if an auto_sleep cycle tries to freeze processes. + */ +static DEFINE_MUTEX(autosleep_lock); +static struct wakeup_source *autosleep_ws; + +static void try_to_suspend(struct work_struct *work) +{ + unsigned int initial_count, final_count; + + if (!pm_get_wakeup_count(&initial_count, true)) + goto out; + + mutex_lock(&autosleep_lock); + + if (!pm_save_wakeup_count(initial_count)) { + mutex_unlock(&autosleep_lock); + goto out; + } + + if (autosleep_state == PM_SUSPEND_ON) { + mutex_unlock(&autosleep_lock); + return; + } + if (autosleep_state >= PM_SUSPEND_MAX) + hibernate(); + else + pm_suspend(autosleep_state); + + mutex_unlock(&autosleep_lock); + + if (!pm_get_wakeup_count(&final_count, false)) + goto out; + + /* + * If the wakeup occured for an unknown reason, wait to prevent the + * system from trying to suspend and waking up in a tight loop. + */ + if (final_count == initial_count) + schedule_timeout_uninterruptible(HZ / 2); + + out: + queue_up_suspend_work(); +} + +static DECLARE_WORK(suspend_work, try_to_suspend); + +void queue_up_suspend_work(void) +{ + if (!work_pending(&suspend_work) && autosleep_state > PM_SUSPEND_ON) + queue_work(autosleep_wq, &suspend_work); +} + +suspend_state_t pm_autosleep_state(void) +{ + return autosleep_state; +} + +int pm_autosleep_lock(void) +{ + return mutex_lock_interruptible(&autosleep_lock); +} + +void pm_autosleep_unlock(void) +{ + mutex_unlock(&autosleep_lock); +} + +int pm_autosleep_set_state(suspend_state_t state) +{ + +#ifndef CONFIG_HIBERNATION + if (state >= PM_SUSPEND_MAX) + return -EINVAL; +#endif + + __pm_stay_awake(autosleep_ws); + + mutex_lock(&autosleep_lock); + + autosleep_state = state; + + __pm_relax(autosleep_ws); + + if (state > PM_SUSPEND_ON) { + pm_wakep_autosleep_enabled(true); + queue_up_suspend_work(); + } else { + pm_wakep_autosleep_enabled(false); + } + + mutex_unlock(&autosleep_lock); + return 0; +} + +int __init pm_autosleep_init(void) +{ + autosleep_ws = wakeup_source_register("autosleep"); + if (!autosleep_ws) + return -ENOMEM; + + autosleep_wq = alloc_ordered_workqueue("autosleep", 0); + if (autosleep_wq) + return 0; + + wakeup_source_unregister(autosleep_ws); + return -ENOMEM; +} diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c new file mode 100644 index 00000000..d09dd10c --- /dev/null +++ b/kernel/power/block_io.c @@ -0,0 +1,103 @@ +/* + * This file provides functions for block I/O operations on swap/file. + * + * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * This file is released under the GPLv2. + */ + +#include <linux/bio.h> +#include <linux/kernel.h> +#include <linux/pagemap.h> +#include <linux/swap.h> + +#include "power.h" + +/** + * submit - submit BIO request. + * @rw: READ or WRITE. + * @off physical offset of page. + * @page: page we're reading or writing. + * @bio_chain: list of pending biod (for async reading) + * + * Straight from the textbook - allocate and initialize the bio. + * If we're reading, make sure the page is marked as dirty. + * Then submit it and, if @bio_chain == NULL, wait. + */ +static int submit(int rw, struct block_device *bdev, sector_t sector, + struct page *page, struct bio **bio_chain) +{ + const int bio_rw = rw | REQ_SYNC; + struct bio *bio; + + bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); + bio->bi_sector = sector; + bio->bi_bdev = bdev; + bio->bi_end_io = end_swap_bio_read; + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + printk(KERN_ERR "PM: Adding page to bio failed at %llu\n", + (unsigned long long)sector); + bio_put(bio); + return -EFAULT; + } + + lock_page(page); + bio_get(bio); + + if (bio_chain == NULL) { + submit_bio(bio_rw, bio); + wait_on_page_locked(page); + if (rw == READ) + bio_set_pages_dirty(bio); + bio_put(bio); + } else { + if (rw == READ) + get_page(page); /* These pages are freed later */ + bio->bi_private = *bio_chain; + *bio_chain = bio; + submit_bio(bio_rw, bio); + } + return 0; +} + +int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) +{ + return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), + virt_to_page(addr), bio_chain); +} + +int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain) +{ + return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), + virt_to_page(addr), bio_chain); +} + +int hib_wait_on_bio_chain(struct bio **bio_chain) +{ + struct bio *bio; + struct bio *next_bio; + int ret = 0; + + if (bio_chain == NULL) + return 0; + + bio = *bio_chain; + if (bio == NULL) + return 0; + while (bio) { + struct page *page; + + next_bio = bio->bi_private; + page = bio->bi_io_vec[0].bv_page; + wait_on_page_locked(page); + if (!PageUptodate(page) || PageError(page)) + ret = -EIO; + put_page(page); + bio_put(bio); + bio = next_bio; + } + *bio_chain = NULL; + return ret; +} diff --git a/kernel/power/console.c b/kernel/power/console.c new file mode 100644 index 00000000..b1dc4564 --- /dev/null +++ b/kernel/power/console.c @@ -0,0 +1,33 @@ +/* + * Functions for saving/restoring console. + * + * Originally from swsusp. + */ + +#include <linux/vt_kern.h> +#include <linux/kbd_kern.h> +#include <linux/vt.h> +#include <linux/module.h> +#include "power.h" + +#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) + +static int orig_fgconsole, orig_kmsg; + +int pm_prepare_console(void) +{ + orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1); + if (orig_fgconsole < 0) + return 1; + + orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE); + return 0; +} + +void pm_restore_console(void) +{ + if (orig_fgconsole >= 0) { + vt_move_to_console(orig_fgconsole, 0); + vt_kmsg_redirect(orig_kmsg); + } +} diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c new file mode 100644 index 00000000..ad3ed9eb --- /dev/null +++ b/kernel/power/hibernate.c @@ -0,0 +1,1306 @@ +/* + * kernel/power/hibernate.c - Hibernation (a.k.a suspend-to-disk) support. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * Copyright (c) 2004 Pavel Machek <pavel@ucw.cz> + * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include <linux/export.h> +#include <linux/suspend.h> +#include <linux/syscalls.h> +#include <linux/reboot.h> +#include <linux/string.h> +#include <linux/device.h> +#include <linux/async.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/pm.h> +#include <linux/console.h> +#include <linux/cpu.h> +#include <linux/freezer.h> +#include <linux/gfp.h> +#include <linux/syscore_ops.h> +#include <linux/ctype.h> +#include <linux/genhd.h> +#include <scsi/scsi_scan.h> +#include <mach/hardware.h> +#include "power.h" +#include "../../drivers/char/wmt-pwm.h" + +#define CONFIG_WMT_KILLER 1 +//#undef CONFIG_WMT_KILLER +#define MAY_SWAP 0x2 + +static int nocompress; +static int noresume; +static int resume_wait; +static int resume_delay; +char resume_file[64] = CONFIG_PM_STD_PARTITION; +dev_t swsusp_resume_device; +sector_t swsusp_resume_block; +int in_suspend __nosavedata; +extern struct mutex wmt_lock; +extern int wmt_swap; +extern u32 __nosave_backup_phys; +extern u32 __nosave_begin_phys; +extern u32 __nosave_end_phys; +static int stress_resume_times;//record stress test times. +extern int wmt_getsyspara(char *varname, unsigned char *varval, int *varlen); + +enum { + HIBERNATION_INVALID, + HIBERNATION_PLATFORM, + HIBERNATION_SHUTDOWN, + HIBERNATION_REBOOT, + /* keep last */ + __HIBERNATION_AFTER_LAST +}; +#define HIBERNATION_MAX (__HIBERNATION_AFTER_LAST-1) +#define HIBERNATION_FIRST (HIBERNATION_INVALID + 1) + +static int hibernation_mode = HIBERNATION_SHUTDOWN; + +bool freezer_test_done; + +static const struct platform_hibernation_ops *hibernation_ops; +extern void wmt_resume_notify(void); +extern void lcd_enable_signal(int enable); +static int wmt_setswap(void) +{ + int varlen = 5; + char std_env_val[20] = "0"; + unsigned int std; + if (wmt_getsyspara("wmt.std.param", std_env_val, &varlen) == 0) { + sscanf(std_env_val, "%X", &std); + if (std & MAY_SWAP) + return 1; + return 0; + } else { + printk(KERN_ALERT "##Warning: \"wmt.std.param\" not find\n"); + printk(KERN_ALERT "Close may_swap function"); + } + return 0; +} + +/** + * hibernation_set_ops - Set the global hibernate operations. + * @ops: Hibernation operations to use in subsequent hibernation transitions. + */ +void hibernation_set_ops(const struct platform_hibernation_ops *ops) +{ + if (ops && !(ops->begin && ops->end && ops->pre_snapshot + && ops->prepare && ops->finish && ops->enter && ops->pre_restore + && ops->restore_cleanup && ops->leave)) { + WARN_ON(1); + return; + } + lock_system_sleep(); + hibernation_ops = ops; + if (ops) + hibernation_mode = HIBERNATION_PLATFORM; + else if (hibernation_mode == HIBERNATION_PLATFORM) + hibernation_mode = HIBERNATION_SHUTDOWN; + + unlock_system_sleep(); +} + +static bool entering_platform_hibernation; + +bool system_entering_hibernation(void) +{ + return entering_platform_hibernation; +} +EXPORT_SYMBOL(system_entering_hibernation); + +#ifdef CONFIG_PM_DEBUG +static void hibernation_debug_sleep(void) +{ + printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n"); + mdelay(5000); +} + +static int hibernation_test(int level) +{ + if (pm_test_level == level) { + hibernation_debug_sleep(); + return 1; + } + return 0; +} +#else /* !CONFIG_PM_DEBUG */ +static int hibernation_test(int level) { return 0; } +#endif /* !CONFIG_PM_DEBUG */ + +/** + * platform_begin - Call platform to start hibernation. + * @platform_mode: Whether or not to use the platform driver. + */ +static int platform_begin(int platform_mode) +{ + return (platform_mode && hibernation_ops) ? + hibernation_ops->begin() : 0; +} + +/** + * platform_end - Call platform to finish transition to the working state. + * @platform_mode: Whether or not to use the platform driver. + */ +static void platform_end(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->end(); +} + +/** + * platform_pre_snapshot - Call platform to prepare the machine for hibernation. + * @platform_mode: Whether or not to use the platform driver. + * + * Use the platform driver to prepare the system for creating a hibernate image, + * if so configured, and return an error code if that fails. + */ + +static int platform_pre_snapshot(int platform_mode) +{ + return (platform_mode && hibernation_ops) ? + hibernation_ops->pre_snapshot() : 0; +} + +/** + * platform_leave - Call platform to prepare a transition to the working state. + * @platform_mode: Whether or not to use the platform driver. + * + * Use the platform driver prepare to prepare the machine for switching to the + * normal mode of operation. + * + * This routine is called on one CPU with interrupts disabled. + */ +static void platform_leave(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->leave(); +} + +/** + * platform_finish - Call platform to switch the system to the working state. + * @platform_mode: Whether or not to use the platform driver. + * + * Use the platform driver to switch the machine to the normal mode of + * operation. + * + * This routine must be called after platform_prepare(). + */ +static void platform_finish(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->finish(); +} + +/** + * platform_pre_restore - Prepare for hibernate image restoration. + * @platform_mode: Whether or not to use the platform driver. + * + * Use the platform driver to prepare the system for resume from a hibernation + * image. + * + * If the restore fails after this function has been called, + * platform_restore_cleanup() must be called. + */ +static int platform_pre_restore(int platform_mode) +{ + return (platform_mode && hibernation_ops) ? + hibernation_ops->pre_restore() : 0; +} + +/** + * platform_restore_cleanup - Switch to the working state after failing restore. + * @platform_mode: Whether or not to use the platform driver. + * + * Use the platform driver to switch the system to the normal mode of operation + * after a failing restore. + * + * If platform_pre_restore() has been called before the failing restore, this + * function must be called too, regardless of the result of + * platform_pre_restore(). + */ +static void platform_restore_cleanup(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->restore_cleanup(); +} + +/** + * platform_recover - Recover from a failure to suspend devices. + * @platform_mode: Whether or not to use the platform driver. + */ +static void platform_recover(int platform_mode) +{ + if (platform_mode && hibernation_ops && hibernation_ops->recover) + hibernation_ops->recover(); +} + +/** + * swsusp_show_speed - Print time elapsed between two events during hibernation. + * @start: Starting event. + * @stop: Final event. + * @nr_pages: Number of memory pages processed between @start and @stop. + * @msg: Additional diagnostic message to print. + */ +void swsusp_show_speed(struct timeval *start, struct timeval *stop, + unsigned nr_pages, char *msg) +{ + s64 elapsed_centisecs64; + int centisecs; + int k; + int kps; + + elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); + do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); + centisecs = elapsed_centisecs64; + if (centisecs == 0) + centisecs = 1; /* avoid div-by-zero */ + k = nr_pages * (PAGE_SIZE / 1024); + kps = (k * 100) / centisecs; + printk(KERN_CRIT "PM: %s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", + msg, k, + centisecs / 100, centisecs % 100, + kps / 1000, (kps % 1000) / 10); +} +#ifdef CONFIG_WMT_KILLER +#include <linux/oom.h> + +#define OOM_SCORE_ADJ_MAX 1000 + +static unsigned long wmt_deathpending_timeout; + +/*Keywords of Excluded Process Name*/ +static const char keywords[][64] = { + ".launcher", + ".mkpro", + "" +}; +/* +Name: wmt_in_list() +Desc: list contains some keywords. check if a specific name contains keyword that on the list. +Param: + keyword_list: the list contains keywords + name: the name to be checked +Return: + if the specific name contains keywords in the list, + return true, otherwise return false. +*/ +static bool wmt_in_list(const char keyword_list[][64], const char *name) +{ + int i = 0; + char *sub_str = NULL; + //printk("checking name: %s\n", name); + while (strcmp(keyword_list[i], "") != 0) { + //printk("matching keyword:%s\n", keyword_list[i]); + sub_str = strstr(name, keyword_list[i]); + + if (sub_str) { + //printk("matched:%s\n", keyword_list[i]); + return true; + } + i++; + } + return false; +} + +static void wmt_killer(void) +{ + struct task_struct *tsk; + struct task_struct *selected = NULL; + int rem = 0; + int tasksize; + int min_score_adj = 1;//OOM_SCORE_ADJ_MAX>>6; + int selected_tasksize = 0; + int selected_oom_score_adj; + + rem = global_page_state(NR_ACTIVE_ANON) + + global_page_state(NR_ACTIVE_FILE) + + global_page_state(NR_INACTIVE_ANON) + + global_page_state(NR_INACTIVE_FILE); + printk("wmt_killer: min_score_adj %d, rem %d\n", + min_score_adj,rem); + + selected_oom_score_adj = min_score_adj; + + rcu_read_lock(); + for_each_process(tsk) { + struct task_struct *p; + int oom_score_adj; + + if (tsk->flags & PF_KTHREAD) + continue; + + p = find_lock_task_mm(tsk); + if (!p) + continue; + + if (test_tsk_thread_flag(p, TIF_MEMDIE) && + time_before_eq(jiffies, wmt_deathpending_timeout)) { + task_unlock(p); + rcu_read_unlock(); + return; + } + oom_score_adj = p->signal->oom_score_adj; + if (oom_score_adj < min_score_adj) { + task_unlock(p); + continue; + } + tasksize = get_mm_rss(p->mm); + task_unlock(p); + if (tasksize <= 0) + continue; +#if 0 + if (selected) { + if (oom_score_adj < selected_oom_score_adj) + continue; + if (oom_score_adj == selected_oom_score_adj && + tasksize <= selected_tasksize) + continue; + } +#endif + selected = p; + selected_tasksize = tasksize; + selected_oom_score_adj = oom_score_adj; + printk("wmt_killer: select %d (%s), adj %d, size %d, to kill\n", + p->pid, p->comm, oom_score_adj, tasksize); + +#if 1 /*check process name first.*/ + if(wmt_in_list(keywords, p->comm)){ + printk("[wmt_killer]: Exclude Process :%s\n", p->comm); + selected = NULL; + } +#endif + if (selected) { + printk("wmt_killer: send sigkill to %d (%s), adj %d, size %d\n", + selected->pid, selected->comm, + selected_oom_score_adj, selected_tasksize); + wmt_deathpending_timeout = jiffies + HZ; + send_sig(SIGKILL, selected, 0); + set_tsk_thread_flag(selected, TIF_MEMDIE); + rem -= selected_tasksize; + } + } + rcu_read_unlock(); + printk("wmt_killer: rem = %d\n", rem); +} + +#else +static void wmt_killer(void){ +} + +#endif + +/** + * create_image - Create a hibernation image. + * @platform_mode: Whether or not to use the platform driver. + * + * Execute device drivers' "late" and "noirq" freeze callbacks, create a + * hibernation image and run the drivers' "noirq" and "early" thaw callbacks. + * + * Control reappears in this routine after the subsequent restore. + */ +static int create_image(int platform_mode) +{ + int error; + unsigned int pmc_temp; + + error = dpm_suspend_end(PMSG_FREEZE); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down, " + "aborting hibernation\n"); + return error; + } + + error = platform_pre_snapshot(platform_mode); + if (error || hibernation_test(TEST_PLATFORM)) + goto Platform_finish; + + error = disable_nonboot_cpus(); + if (error || hibernation_test(TEST_CPUS)) + goto Enable_cpus; + + local_irq_disable(); + + error = syscore_suspend(); + if (error) { + printk(KERN_ERR "PM: Some system devices failed to power down, " + "aborting hibernation\n"); + goto Enable_irqs; + } + + pmc_temp = PMWS_VAL; + pmc_temp &= (WK_TRG_EN_VAL | 0x4000); + PMWS_VAL = PMWS_VAL; + if (hibernation_test(TEST_CORE) || pm_wakeup_pending()) + goto Power_up; + in_suspend = 1; + save_processor_state(); + error = swsusp_arch_suspend(); + if (error) + printk(KERN_ERR "PM: Error %d creating hibernation image\n", + error); + /* Restore control flow magically appears here */ + restore_processor_state(); +#if 1 + /*check if nosave addresses are correct.*/ + if(!in_suspend){ + printk(KERN_CRIT "PM: Image Restored, Resuming...\n"); + printk("__nosave_backup_phys=0x%x\n",__nosave_backup_phys); + printk("__nosave_begin_phys=0x%x\n",__nosave_begin_phys); + printk("__nosave_end_phys=0x%x\n",__nosave_end_phys); + } +#endif + if (!in_suspend) { + events_check_enabled = false; + //platform_leave(platform_mode); + } + /* + do platform_leave() for both in_suspend = 0 or 1. + When in_suspend=1, + If we need to abort the procedure, + platform_leave() will not be called unless we do it here. + Without calling this function, some devices are not be resumed + and can not functional properly. + */ + platform_leave(platform_mode); + + Power_up: + syscore_resume(); + + Enable_irqs: + local_irq_enable(); + + Enable_cpus: + enable_nonboot_cpus(); + + Platform_finish: + platform_finish(platform_mode); + + dpm_resume_start(in_suspend ? + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); + + return error; +} + +/** + * hibernation_snapshot - Quiesce devices and create a hibernation image. + * @platform_mode: If set, use platform driver to prepare for the transition. + * + * This routine must be called with pm_mutex held. + */ +int hibernation_snapshot(int platform_mode) +{ + pm_message_t msg; + int error; + + error = platform_begin(platform_mode); + if (error) + goto Close; + + /* Preallocate image memory before shutting down devices. */ + error = hibernate_preallocate_memory(); + if (error) { + mutex_lock(&wmt_lock); + mutex_unlock(&wmt_lock); + goto Close; + } + mutex_lock(&wmt_lock); + error = freeze_kernel_threads(); + if (error) + goto Cleanup; + + if (hibernation_test(TEST_FREEZER)) { + + /* + * Indicate to the caller that we are returning due to a + * successful freezer test. + */ + freezer_test_done = true; + goto Thaw; + } + + error = dpm_prepare(PMSG_FREEZE); + if (error) { + dpm_complete(PMSG_RECOVER); + goto Thaw; + } + + suspend_console(); + pm_restrict_gfp_mask(); + + error = dpm_suspend(PMSG_FREEZE); + + if (error || hibernation_test(TEST_DEVICES)) + platform_recover(platform_mode); + else + error = create_image(platform_mode); + mutex_unlock(&wmt_lock); + /* + * In the case that we call create_image() above, the control + * returns here (1) after the image has been created or the + * image creation has failed and (2) after a successful restore. + */ + + /* We may need to release the preallocated image pages here. */ + if (error || !in_suspend) + swsusp_free(); + + msg = in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE; + dpm_resume(msg); + + if (error || !in_suspend) + pm_restore_gfp_mask(); + + resume_console(); + dpm_complete(msg); + + Close: + platform_end(platform_mode); + return error; + + Thaw: + thaw_kernel_threads(); + Cleanup: + swsusp_free(); + goto Close; +} + +/** + * resume_target_kernel - Restore system state from a hibernation image. + * @platform_mode: Whether or not to use the platform driver. + * + * Execute device drivers' "noirq" and "late" freeze callbacks, restore the + * contents of highmem that have not been restored yet from the image and run + * the low-level code that will restore the remaining contents of memory and + * switch to the just restored target kernel. + */ +static int resume_target_kernel(bool platform_mode) +{ + int error; + + error = dpm_suspend_end(PMSG_QUIESCE); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down, " + "aborting resume\n"); + return error; + } + + error = platform_pre_restore(platform_mode); + if (error) + goto Cleanup; + + error = disable_nonboot_cpus(); + if (error) + goto Enable_cpus; + + local_irq_disable(); + + error = syscore_suspend(); + if (error) + goto Enable_irqs; + + save_processor_state(); + error = restore_highmem(); + if (!error) { + error = swsusp_arch_resume(); + /* + * The code below is only ever reached in case of a failure. + * Otherwise, execution continues at the place where + * swsusp_arch_suspend() was called. + */ + BUG_ON(!error); + /* + * This call to restore_highmem() reverts the changes made by + * the previous one. + */ + restore_highmem(); + } + /* + * The only reason why swsusp_arch_resume() can fail is memory being + * very tight, so we have to free it as soon as we can to avoid + * subsequent failures. + */ + swsusp_free(); + restore_processor_state(); + touch_softlockup_watchdog(); + + syscore_resume(); + + Enable_irqs: + local_irq_enable(); + + Enable_cpus: + enable_nonboot_cpus(); + + Cleanup: + platform_restore_cleanup(platform_mode); + + dpm_resume_start(PMSG_RECOVER); + + return error; +} + +/** + * hibernation_restore - Quiesce devices and restore from a hibernation image. + * @platform_mode: If set, use platform driver to prepare for the transition. + * + * This routine must be called with pm_mutex held. If it is successful, control + * reappears in the restored target kernel in hibernation_snapshot(). + */ +int hibernation_restore(int platform_mode) +{ + int error; + + pm_prepare_console(); + suspend_console(); + pm_restrict_gfp_mask(); + error = dpm_suspend_start(PMSG_QUIESCE); + if (!error) { + error = resume_target_kernel(platform_mode); + dpm_resume_end(PMSG_RECOVER); + } + pm_restore_gfp_mask(); + resume_console(); + pm_restore_console(); + return error; +} + +/** + * hibernation_platform_enter - Power off the system using the platform driver. + */ +int hibernation_platform_enter(void) +{ + int error; + + if (!hibernation_ops) + return -ENOSYS; + + /* + * We have cancelled the power transition by running + * hibernation_ops->finish() before saving the image, so we should let + * the firmware know that we're going to enter the sleep state after all + */ + error = hibernation_ops->begin(); + if (error) + goto Close; + + entering_platform_hibernation = true; + suspend_console(); + error = dpm_suspend_start(PMSG_HIBERNATE); + if (error) { + if (hibernation_ops->recover) + hibernation_ops->recover(); + goto Resume_devices; + } + + error = dpm_suspend_end(PMSG_HIBERNATE); + if (error) + goto Resume_devices; + + error = hibernation_ops->prepare(); + if (error) + goto Platform_finish; + + error = disable_nonboot_cpus(); + if (error) + goto Platform_finish; + + local_irq_disable(); + syscore_suspend(); + if (pm_wakeup_pending()) { + error = -EAGAIN; + goto Power_up; + } + + hibernation_ops->enter(); + /* We should never get here */ + while (1); + + Power_up: + syscore_resume(); + local_irq_enable(); + enable_nonboot_cpus(); + + Platform_finish: + hibernation_ops->finish(); + + dpm_resume_start(PMSG_RESTORE); + + Resume_devices: + entering_platform_hibernation = false; + dpm_resume_end(PMSG_RESTORE); + resume_console(); + + Close: + hibernation_ops->end(); + + return error; +} + +/** + * power_down - Shut the machine down for hibernation. + * + * Use the platform driver, if configured, to put the system into the sleep + * state corresponding to hibernation, or try to power it off or reboot, + * depending on the value of hibernation_mode. + */ +static void power_down(void) +{ + switch (hibernation_mode) { + case HIBERNATION_REBOOT: + kernel_restart(NULL); + break; + case HIBERNATION_PLATFORM: + hibernation_platform_enter(); + case HIBERNATION_SHUTDOWN: + kernel_power_off(); + break; + } + kernel_halt(); + /* + * Valid image is on the disk, if we continue we risk serious data + * corruption after resume. + */ + printk(KERN_CRIT "PM: Please power down manually\n"); + while(1); +} + +/** + * hibernate - Carry out system hibernation, including saving the image. + */ +int hibernate(void) +{ + int error; + char std_env_val[20] = "0"; + int varlen = 5; + unsigned int std; + + lock_system_sleep(); + + error = swsusp_swap_check(); + if (error) { + if (error != -ENOSPC) + printk(KERN_ERR "PM: Cannot find swap device, try " + "swapon -a.\n"); + goto Unlock; + } + + /* The snapshot device should not be opened while we're running */ + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } + + pm_prepare_console(); + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + if (error) + goto Exit; + /*Try to open swap function*/ + wmt_swap = wmt_setswap(); + + /* Allocate memory management structures */ + error = create_basic_memory_bitmaps(); + if (error) + goto Exit; + + printk(KERN_CRIT "PM: Syncing filesystems ... "); + sys_sync(); + printk("done.\n"); + + wmt_killer();//kill process to save memory before freeze_process. + + error = freeze_processes(); + if (error) + goto Free_bitmaps; + + error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); + if (error || freezer_test_done) { + pm_notifier_call_chain(PM_HIBERNATION_FINISH); + goto Thaw; + } + if (in_suspend) { + unsigned int flags = 0; + + if (hibernation_mode == HIBERNATION_PLATFORM) + flags |= SF_PLATFORM_MODE; + if (nocompress) + flags |= SF_NOCOMPRESS_MODE; + else + flags |= SF_CRC32_MODE; + + printk(KERN_CRIT"PM: writing image.\n"); + error = swsusp_write(flags); + pm_notifier_call_chain(PM_HIBERNATION_FINISH); + swsusp_free(); + if (!error) + power_down(); + in_suspend = 0; + pm_restore_gfp_mask(); + } else { + mutex_unlock(&wmt_lock); + printk(KERN_CRIT "PM: Image restored successfully.\n"); + printk(KERN_CRIT "PM: STD Stress Test: %d times\n",++stress_resume_times); + printk(KERN_CRIT "Resume to Android\n"); + if (wmt_getsyspara("wmt.std.param",std_env_val,&varlen) == 0) { + sscanf(std_env_val,"%X",&std); + if (std & 0x1) + wmt_resume_notify(); + } + } + + Thaw: + thaw_processes(); + + /* Don't bother checking whether freezer_test_done is true */ + freezer_test_done = false; + + Free_bitmaps: + free_basic_memory_bitmaps(); + Exit: + pm_notifier_call_chain(PM_POST_HIBERNATION); + pm_restore_console(); + atomic_inc(&snapshot_device_available); + Unlock: + unlock_system_sleep(); + + wmt_swap = 1; + return error; +} + + +/** + * software_resume - Resume from a saved hibernation image. + * + * This routine is called as a late initcall, when all devices have been + * discovered and initialized already. + * + * The image reading code is called to see if there is a hibernation image + * available for reading. If that is the case, devices are quiesced and the + * contents of memory is restored from the saved image. + * + * If this is successful, control reappears in the restored target kernel in + * hibernation_snaphot() which returns to hibernate(). Otherwise, the routine + * attempts to recover gracefully and make the kernel return to the normal mode + * of operation. + */ +static int software_resume(void) +{ + int error; + unsigned int flags; + int retry_wait; + + /* + Set resume_wait to 1 is essential when using mmc device as std partition. + Note that kernel will wait for it until it is present. + */ + resume_wait = 1; + /* + * If the user said "noresume".. bail out early. + */ + if (noresume) + return 0; + + /* + * name_to_dev_t() below takes a sysfs buffer mutex when sysfs + * is configured into the kernel. Since the regular hibernate + * trigger path is via sysfs which takes a buffer mutex before + * calling hibernate functions (which take pm_mutex) this can + * cause lockdep to complain about a possible ABBA deadlock + * which cannot happen since we're in the boot code here and + * sysfs can't be invoked yet. Therefore, we use a subclass + * here to avoid lockdep complaining. + */ + mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING); + + if (swsusp_resume_device) + goto Check_image; + + if (!strlen(resume_file)) { + error = -ENOENT; + goto Unlock; + } + + printk(KERN_CRIT"PM: Checking hibernation image partition %s\n", resume_file); + + if (resume_delay) { + printk(KERN_INFO "Waiting %dsec before reading resume device...\n", + resume_delay); + ssleep(resume_delay); + } + + /* Check if the device is there */ + swsusp_resume_device = name_to_dev_t(resume_file); + + /* + * name_to_dev_t is ineffective to verify parition if resume_file is in + * integer format. (e.g. major:minor) + */ + if (isdigit(resume_file[0]) && resume_wait) { + int partno; + while (!get_gendisk(swsusp_resume_device, &partno)) + msleep(10); + } + + if (!swsusp_resume_device) { + /* + * Some device discovery might still be in progress; we need + * to wait for this to finish. + */ + printk(KERN_CRIT"\n resume device not present.\n"); + wait_for_device_probe(); + + if (resume_wait) { +#if 0 + while ((swsusp_resume_device = name_to_dev_t(resume_file)) == 0) + msleep(10); +#else + for(retry_wait=100; retry_wait>0; retry_wait--){ + if((swsusp_resume_device = name_to_dev_t(resume_file)) == 0){ + msleep(10); + } + } +#endif + async_synchronize_full(); + } + + /* + * We can't depend on SCSI devices being available after loading + * one of their modules until scsi_complete_async_scans() is + * called and the resume device usually is a SCSI one. + */ + scsi_complete_async_scans(); + + swsusp_resume_device = name_to_dev_t(resume_file); + if (!swsusp_resume_device) { + error = -ENODEV; + goto Unlock; + } + } + + Check_image: + printk(KERN_CRIT"PM: Hibernation image partition %d:%d present\n", + MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); + + printk(KERN_CRIT"PM: Looking for hibernation image.\n"); + error = swsusp_check(); + if (error) + goto Unlock; + + /* The snapshot device should not be opened while we're running */ + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + swsusp_close(FMODE_READ); + goto Unlock; + } + + pm_prepare_console(); + error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + if (error) + goto close_finish; + + error = create_basic_memory_bitmaps(); + if (error) + goto close_finish; + + printk(KERN_CRIT"PM: Preparing processes for restore.\n"); + error = freeze_processes(); + if (error) { + swsusp_close(FMODE_READ); + goto Done; + } + + printk(KERN_CRIT"PM: Loading hibernation image.\n"); + + error = swsusp_read(&flags); + swsusp_close(FMODE_READ); + if (!error) + hibernation_restore(flags & SF_PLATFORM_MODE); + + printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n"); + swsusp_free(); + thaw_processes(); + Done: + free_basic_memory_bitmaps(); + Finish: + pm_notifier_call_chain(PM_POST_RESTORE); + pm_restore_console(); + atomic_inc(&snapshot_device_available); + /* For success case, the suspend path will release the lock */ + Unlock: + mutex_unlock(&pm_mutex); + printk(KERN_CRIT"PM: Hibernation image not present or could not be loaded.\n"); + return error; +close_finish: + swsusp_close(FMODE_READ); + goto Finish; +} + +late_initcall(software_resume); + + +static const char * const hibernation_modes[] = { + [HIBERNATION_PLATFORM] = "platform", + [HIBERNATION_SHUTDOWN] = "shutdown", + [HIBERNATION_REBOOT] = "reboot", +}; + +/* + * /sys/power/disk - Control hibernation mode. + * + * Hibernation can be handled in several ways. There are a few different ways + * to put the system into the sleep state: using the platform driver (e.g. ACPI + * or other hibernation_ops), powering it off or rebooting it (for testing + * mostly). + * + * The sysfs file /sys/power/disk provides an interface for selecting the + * hibernation mode to use. Reading from this file causes the available modes + * to be printed. There are 3 modes that can be supported: + * + * 'platform' + * 'shutdown' + * 'reboot' + * + * If a platform hibernation driver is in use, 'platform' will be supported + * and will be used by default. Otherwise, 'shutdown' will be used by default. + * The selected option (i.e. the one corresponding to the current value of + * hibernation_mode) is enclosed by a square bracket. + * + * To select a given hibernation mode it is necessary to write the mode's + * string representation (as returned by reading from /sys/power/disk) back + * into /sys/power/disk. + */ + +static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + int i; + char *start = buf; + + for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { + if (!hibernation_modes[i]) + continue; + switch (i) { + case HIBERNATION_SHUTDOWN: + case HIBERNATION_REBOOT: + break; + case HIBERNATION_PLATFORM: + if (hibernation_ops) + break; + /* not a valid mode, continue with loop */ + continue; + } + if (i == hibernation_mode) + buf += sprintf(buf, "[%s] ", hibernation_modes[i]); + else + buf += sprintf(buf, "%s ", hibernation_modes[i]); + } + buf += sprintf(buf, "\n"); + return buf-start; +} + +static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = 0; + int i; + int len; + char *p; + int mode = HIBERNATION_INVALID; + + p = memchr(buf, '\n', n); + len = p ? p - buf : n; + + lock_system_sleep(); + for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { + if (len == strlen(hibernation_modes[i]) + && !strncmp(buf, hibernation_modes[i], len)) { + mode = i; + break; + } + } + if (mode != HIBERNATION_INVALID) { + switch (mode) { + case HIBERNATION_SHUTDOWN: + case HIBERNATION_REBOOT: + hibernation_mode = mode; + break; + case HIBERNATION_PLATFORM: + if (hibernation_ops) + hibernation_mode = mode; + else + error = -EINVAL; + } + } else + error = -EINVAL; + + if (!error) + printk(KERN_CRIT"PM: Hibernation mode set to '%s'\n", + hibernation_modes[mode]); + unlock_system_sleep(); + return error ? error : n; +} + +power_attr(disk); + +static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), + MINOR(swsusp_resume_device)); +} + +static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned int maj, min; + dev_t res; + int ret = -EINVAL; + + if (sscanf(buf, "%u:%u", &maj, &min) != 2) + goto out; + + res = MKDEV(maj,min); + if (maj != MAJOR(res) || min != MINOR(res)) + goto out; + + lock_system_sleep(); + swsusp_resume_device = res; + unlock_system_sleep(); + printk(KERN_INFO "PM: Starting manual resume from disk\n"); + noresume = 0; + software_resume(); + ret = n; + out: + return ret; +} + +power_attr(resume); + +static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%lu\n", image_size); +} + +static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long size; + + if (sscanf(buf, "%lu", &size) == 1) { + image_size = size; + return n; + } + + return -EINVAL; +} + +power_attr(image_size); + +static ssize_t reserved_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", reserved_size); +} + +static ssize_t reserved_size_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long size; + + if (sscanf(buf, "%lu", &size) == 1) { + reserved_size = size; + return n; + } + + return -EINVAL; +} + +power_attr(reserved_size); + +static struct attribute * g[] = { + &disk_attr.attr, + &resume_attr.attr, + &image_size_attr.attr, + &reserved_size_attr.attr, + NULL, +}; + + +static struct attribute_group attr_group = { + .attrs = g, +}; + + +static int __init pm_disk_init(void) +{ + return sysfs_create_group(power_kobj, &attr_group); +} + +core_initcall(pm_disk_init); + + +static int __init resume_setup(char *str) +{ + if (noresume) + return 1; + + strncpy( resume_file, str, 255 ); + return 1; +} + +static int __init resume_offset_setup(char *str) +{ + unsigned long long offset; + + if (noresume) + return 1; + + if (sscanf(str, "%llu", &offset) == 1) + swsusp_resume_block = offset; + + return 1; +} + +static int __init hibernate_setup(char *str) +{ + if (!strncmp(str, "noresume", 8)) + noresume = 1; + else if (!strncmp(str, "nocompress", 10)) + nocompress = 1; + return 1; +} + +static int __init noresume_setup(char *str) +{ + noresume = 1; + return 1; +} + +static int __init resumewait_setup(char *str) +{ + resume_wait = 1; + return 1; +} + +static int __init resumedelay_setup(char *str) +{ + resume_delay = simple_strtoul(str, NULL, 0); + return 1; +} + +__setup("noresume", noresume_setup); +__setup("resume_offset=", resume_offset_setup); +__setup("resume=", resume_setup); +__setup("hibernate=", hibernate_setup); +__setup("resumewait", resumewait_setup); +__setup("resumedelay=", resumedelay_setup); diff --git a/kernel/power/main.c b/kernel/power/main.c new file mode 100644 index 00000000..bd594ff7 --- /dev/null +++ b/kernel/power/main.c @@ -0,0 +1,661 @@ +/* + * kernel/power/main.c - PM subsystem core functionality. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * + * This file is released under the GPLv2 + * + */ + +#include <linux/module.h> +#include <linux/export.h> +#include <linux/kobject.h> +#include <linux/string.h> +#include <linux/resume-trace.h> +#include <linux/workqueue.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/reboot.h> +#include "power.h" + +DEFINE_MUTEX(pm_mutex); + +#ifdef CONFIG_PM_SLEEP + +/* Routines for PM-transition notifications */ + +static BLOCKING_NOTIFIER_HEAD(pm_chain_head); + +int register_pm_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&pm_chain_head, nb); +} +EXPORT_SYMBOL_GPL(register_pm_notifier); + +int unregister_pm_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&pm_chain_head, nb); +} +EXPORT_SYMBOL_GPL(unregister_pm_notifier); + +int pm_notifier_call_chain(unsigned long val) +{ + int ret = blocking_notifier_call_chain(&pm_chain_head, val, NULL); + + return notifier_to_errno(ret); +} + +/* If set, devices may be suspended and resumed asynchronously. */ +int pm_async_enabled = 1; + +static ssize_t pm_async_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", pm_async_enabled); +} + +static ssize_t pm_async_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long val; + + if (strict_strtoul(buf, 10, &val)) + return -EINVAL; + + if (val > 1) + return -EINVAL; + + pm_async_enabled = val; + return n; +} + +power_attr(pm_async); + +#ifdef CONFIG_PM_DEBUG +int pm_test_level = TEST_NONE; + +static const char * const pm_tests[__TEST_AFTER_LAST] = { + [TEST_NONE] = "none", + [TEST_CORE] = "core", + [TEST_CPUS] = "processors", + [TEST_PLATFORM] = "platform", + [TEST_DEVICES] = "devices", + [TEST_FREEZER] = "freezer", +}; + +static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + char *s = buf; + int level; + + for (level = TEST_FIRST; level <= TEST_MAX; level++) + if (pm_tests[level]) { + if (level == pm_test_level) + s += sprintf(s, "[%s] ", pm_tests[level]); + else + s += sprintf(s, "%s ", pm_tests[level]); + } + + if (s != buf) + /* convert the last space to a newline */ + *(s-1) = '\n'; + + return (s - buf); +} + +static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + const char * const *s; + int level; + char *p; + int len; + int error = -EINVAL; + + p = memchr(buf, '\n', n); + len = p ? p - buf : n; + + lock_system_sleep(); + + level = TEST_FIRST; + for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++) + if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) { + pm_test_level = level; + error = 0; + break; + } + + unlock_system_sleep(); + + return error ? error : n; +} + +power_attr(pm_test); +#endif /* CONFIG_PM_DEBUG */ + +#ifdef CONFIG_DEBUG_FS +static char *suspend_step_name(enum suspend_stat_step step) +{ + switch (step) { + case SUSPEND_FREEZE: + return "freeze"; + case SUSPEND_PREPARE: + return "prepare"; + case SUSPEND_SUSPEND: + return "suspend"; + case SUSPEND_SUSPEND_NOIRQ: + return "suspend_noirq"; + case SUSPEND_RESUME_NOIRQ: + return "resume_noirq"; + case SUSPEND_RESUME: + return "resume"; + default: + return ""; + } +} + +static int suspend_stats_show(struct seq_file *s, void *unused) +{ + int i, index, last_dev, last_errno, last_step; + + last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; + last_dev %= REC_FAILED_NUM; + last_errno = suspend_stats.last_failed_errno + REC_FAILED_NUM - 1; + last_errno %= REC_FAILED_NUM; + last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; + last_step %= REC_FAILED_NUM; + seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n" + "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n", + "success", suspend_stats.success, + "fail", suspend_stats.fail, + "failed_freeze", suspend_stats.failed_freeze, + "failed_prepare", suspend_stats.failed_prepare, + "failed_suspend", suspend_stats.failed_suspend, + "failed_suspend_late", + suspend_stats.failed_suspend_late, + "failed_suspend_noirq", + suspend_stats.failed_suspend_noirq, + "failed_resume", suspend_stats.failed_resume, + "failed_resume_early", + suspend_stats.failed_resume_early, + "failed_resume_noirq", + suspend_stats.failed_resume_noirq); + seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", + suspend_stats.failed_devs[last_dev]); + for (i = 1; i < REC_FAILED_NUM; i++) { + index = last_dev + REC_FAILED_NUM - i; + index %= REC_FAILED_NUM; + seq_printf(s, "\t\t\t%-s\n", + suspend_stats.failed_devs[index]); + } + seq_printf(s, " last_failed_errno:\t%-d\n", + suspend_stats.errno[last_errno]); + for (i = 1; i < REC_FAILED_NUM; i++) { + index = last_errno + REC_FAILED_NUM - i; + index %= REC_FAILED_NUM; + seq_printf(s, "\t\t\t%-d\n", + suspend_stats.errno[index]); + } + seq_printf(s, " last_failed_step:\t%-s\n", + suspend_step_name( + suspend_stats.failed_steps[last_step])); + for (i = 1; i < REC_FAILED_NUM; i++) { + index = last_step + REC_FAILED_NUM - i; + index %= REC_FAILED_NUM; + seq_printf(s, "\t\t\t%-s\n", + suspend_step_name( + suspend_stats.failed_steps[index])); + } + + return 0; +} + +static int suspend_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, suspend_stats_show, NULL); +} + +static const struct file_operations suspend_stats_operations = { + .open = suspend_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init pm_debugfs_init(void) +{ + debugfs_create_file("suspend_stats", S_IFREG | S_IRUGO, + NULL, NULL, &suspend_stats_operations); + return 0; +} + +late_initcall(pm_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ + +#endif /* CONFIG_PM_SLEEP */ + +struct kobject *power_kobj; + +/** + * state - control system power state. + * + * show() returns what states are supported, which is hard-coded to + * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and + * 'disk' (Suspend-to-Disk). + * + * store() accepts one of those strings, translates it into the + * proper enumerated value, and initiates a suspend transition. + */ +static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + char *s = buf; +#ifdef CONFIG_SUSPEND + int i; + + for (i = 0; i < PM_SUSPEND_MAX; i++) { + if (pm_states[i] && valid_state(i)) + s += sprintf(s,"%s ", pm_states[i]); + } +#endif +#ifdef CONFIG_HIBERNATION + s += sprintf(s, "%s\n", "disk"); +#else + if (s != buf) + /* convert the last space to a newline */ + *(s-1) = '\n'; +#endif + return (s - buf); +} + +static int run_pre_suspend(void) +{ + int ret; + char *argv[] = { "/system/etc/wmt/pm.sh", "", NULL }; + char *envp[] = + { "HOME=/", "PATH=/sbin:/bin:/usr/sbin:/usr/bin", "ACTION=pre_suspend", NULL }; + + ret = call_usermodehelper(argv[0], argv, envp, 1); + return ret; +} + +static int run_post_resume(void) +{ + int ret; + char *argv[] = { "/system/etc/wmt/pm.sh", "", NULL }; + char *envp[] = + { "HOME=/", "PATH=/sbin:/bin:/usr/sbin:/usr/bin", "ACTION=post_resume", NULL }; + + ret = call_usermodehelper(argv[0], argv, envp, 1); + return ret; +} + +static int run_hibernate_stress_ramdisk(void) +{ + int ret; + char *argv[] = { "/etc/wmt/scripts/std_stress_test.sh", "", NULL }; + char *envp[] = + { "HOME=/", "PATH=/sbin:/bin:/usr/sbin:/usr/bin", "", NULL }; + + ret = call_usermodehelper(argv[0], argv, envp, 1); + return ret; +} + +static int run_hibernate_stress_android(void) +{ + int ret; + char *argv[] = { "/system/etc/wmt/script/std_stress_test.sh", "", NULL }; + char *envp[] = + { "HOME=/", "PATH=/sbin:/bin:/usr/sbin:/usr/bin", "", NULL }; + + ret = call_usermodehelper(argv[0], argv, envp, 1); + return ret; +} + +static suspend_state_t decode_state(const char *buf, size_t n) +{ +#ifdef CONFIG_SUSPEND + suspend_state_t state = PM_SUSPEND_STANDBY; + const char * const *s; +#endif + char *p; + int len; + + p = memchr(buf, '\n', n); + len = p ? p - buf : n; + + /* Check hibernation first. */ + if (len == 4 && !strncmp(buf, "disk", len)) + return PM_SUSPEND_MAX; + +#ifdef CONFIG_SUSPEND + for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) + if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) + return state; +#endif + + return PM_SUSPEND_ON; +} + +int std_stress_test;//std_stress_test = wmt.std.stress +extern int wmt_getsyspara(char *varname, unsigned char *varval, int *varlen); + +static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state; + int error; + int ret = 0; + unsigned char buf_env[80];//for std_stress + int varlen = sizeof(buf_env);//for std_stress + static int retry = STD_RETRY_TIMES; + error = pm_autosleep_lock(); + if (error) + return error; + + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + + state = decode_state(buf, n); + if (state < PM_SUSPEND_MAX) { + if (state == PM_SUSPEND_MEM){ + run_pre_suspend(); + } else if(state == PM_SUSPEND_ON) { + run_post_resume(); + } + error = pm_suspend(state); + } + else if (state == PM_SUSPEND_MAX) + error = hibernate(); + else + error = -EINVAL; + + out: + pm_autosleep_unlock(); + if( state == PM_SUSPEND_MAX && error + && error!=-STD_USER_ABORT + ){ + printk(KERN_CRIT "\nHibernate Failed.\n"); + if(retry > 0) + ret = run_hibernate_stress_android(); + printk(KERN_CRIT"run script : ret=%d\n", ret); + if(ret || !retry){ + printk(KERN_CRIT "\n Hibernation Failed. Power OFF.\n"); + kernel_power_off(); + } + retry--; + } + + if(!error && state == PM_SUSPEND_MAX){ + retry = STD_RETRY_TIMES; /*Reset retry times after STD Restore.*/ + + /*hibernation stress test */ + std_stress_test = 0; + ret = wmt_getsyspara("wmt.std.stress", buf_env, &varlen); + if (ret == 0) + sscanf(buf_env, "%d", &std_stress_test); + if(std_stress_test) { + printk(KERN_CRIT "\n Runing STD Stress Test \n"); + ret = run_hibernate_stress_ramdisk();//if success, ret=0. + if(ret){//fail. + printk(KERN_CRIT "\n Under Android.\n"); + run_hibernate_stress_android(); + } + } + } + + return error ? error : n; +} + +power_attr(state); + +#ifdef CONFIG_PM_SLEEP +/* + * The 'wakeup_count' attribute, along with the functions defined in + * drivers/base/power/wakeup.c, provides a means by which wakeup events can be + * handled in a non-racy way. + * + * If a wakeup event occurs when the system is in a sleep state, it simply is + * woken up. In turn, if an event that would wake the system up from a sleep + * state occurs when it is undergoing a transition to that sleep state, the + * transition should be aborted. Moreover, if such an event occurs when the + * system is in the working state, an attempt to start a transition to the + * given sleep state should fail during certain period after the detection of + * the event. Using the 'state' attribute alone is not sufficient to satisfy + * these requirements, because a wakeup event may occur exactly when 'state' + * is being written to and may be delivered to user space right before it is + * frozen, so the event will remain only partially processed until the system is + * woken up by another event. In particular, it won't cause the transition to + * a sleep state to be aborted. + * + * This difficulty may be overcome if user space uses 'wakeup_count' before + * writing to 'state'. It first should read from 'wakeup_count' and store + * the read value. Then, after carrying out its own preparations for the system + * transition to a sleep state, it should write the stored value to + * 'wakeup_count'. If that fails, at least one wakeup event has occurred since + * 'wakeup_count' was read and 'state' should not be written to. Otherwise, it + * is allowed to write to 'state', but the transition will be aborted if there + * are any wakeup events detected after 'wakeup_count' was written to. + */ + +static ssize_t wakeup_count_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + unsigned int val; + + return pm_get_wakeup_count(&val, true) ? + sprintf(buf, "%u\n", val) : -EINTR; +} + +static ssize_t wakeup_count_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned int val; + int error; + + error = pm_autosleep_lock(); + if (error) + return error; + + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + + error = -EINVAL; + if (sscanf(buf, "%u", &val) == 1) { + if (pm_save_wakeup_count(val)) + error = n; + } + + out: + pm_autosleep_unlock(); + return error; +} + +power_attr(wakeup_count); + +#ifdef CONFIG_PM_AUTOSLEEP +static ssize_t autosleep_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + suspend_state_t state = pm_autosleep_state(); + + if (state == PM_SUSPEND_ON) + return sprintf(buf, "off\n"); + +#ifdef CONFIG_SUSPEND + if (state < PM_SUSPEND_MAX) + return sprintf(buf, "%s\n", valid_state(state) ? + pm_states[state] : "error"); +#endif +#ifdef CONFIG_HIBERNATION + return sprintf(buf, "disk\n"); +#else + return sprintf(buf, "error"); +#endif +} + +static ssize_t autosleep_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state = decode_state(buf, n); + int error; + + if (state == PM_SUSPEND_ON + && strcmp(buf, "off") && strcmp(buf, "off\n")) + return -EINVAL; + + error = pm_autosleep_set_state(state); + return error ? error : n; +} + +power_attr(autosleep); +#endif /* CONFIG_PM_AUTOSLEEP */ + +#ifdef CONFIG_PM_WAKELOCKS +static ssize_t wake_lock_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return pm_show_wakelocks(buf, true); +} + +static ssize_t wake_lock_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = pm_wake_lock(buf); + return error ? error : n; +} + +power_attr(wake_lock); + +static ssize_t wake_unlock_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return pm_show_wakelocks(buf, false); +} + +static ssize_t wake_unlock_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = pm_wake_unlock(buf); + return error ? error : n; +} + +power_attr(wake_unlock); + +#endif /* CONFIG_PM_WAKELOCKS */ +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_PM_TRACE +int pm_trace_enabled; + +static ssize_t pm_trace_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", pm_trace_enabled); +} + +static ssize_t +pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int val; + + if (sscanf(buf, "%d", &val) == 1) { + pm_trace_enabled = !!val; + return n; + } + return -EINVAL; +} + +power_attr(pm_trace); + +static ssize_t pm_trace_dev_match_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return show_trace_dev_match(buf, PAGE_SIZE); +} + +static ssize_t +pm_trace_dev_match_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + return -EINVAL; +} + +power_attr(pm_trace_dev_match); + +#endif /* CONFIG_PM_TRACE */ + +static struct attribute * g[] = { + &state_attr.attr, +#ifdef CONFIG_PM_TRACE + &pm_trace_attr.attr, + &pm_trace_dev_match_attr.attr, +#endif +#ifdef CONFIG_PM_SLEEP + &pm_async_attr.attr, + &wakeup_count_attr.attr, +#ifdef CONFIG_PM_AUTOSLEEP + &autosleep_attr.attr, +#endif +#ifdef CONFIG_PM_WAKELOCKS + &wake_lock_attr.attr, + &wake_unlock_attr.attr, +#endif +#ifdef CONFIG_PM_DEBUG + &pm_test_attr.attr, +#endif +#endif + NULL, +}; + +static struct attribute_group attr_group = { + .attrs = g, +}; + +#ifdef CONFIG_PM_RUNTIME +struct workqueue_struct *pm_wq; +EXPORT_SYMBOL_GPL(pm_wq); + +static int __init pm_start_workqueue(void) +{ + pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0); + + return pm_wq ? 0 : -ENOMEM; +} +#else +static inline int pm_start_workqueue(void) { return 0; } +#endif + +static int __init pm_init(void) +{ + int error = pm_start_workqueue(); + if (error) + return error; + hibernate_image_size_init(); + hibernate_reserved_size_init(); + power_kobj = kobject_create_and_add("power", NULL); + if (!power_kobj) + return -ENOMEM; + error = sysfs_create_group(power_kobj, &attr_group); + if (error) + return error; + return pm_autosleep_init(); +} + +core_initcall(pm_init); diff --git a/kernel/power/power.h b/kernel/power/power.h new file mode 100644 index 00000000..716541bc --- /dev/null +++ b/kernel/power/power.h @@ -0,0 +1,310 @@ +#include <linux/suspend.h> +#include <linux/suspend_ioctls.h> +#include <linux/utsname.h> +#include <linux/freezer.h> + +/*FIXME: currently PAGE CRC can not support highmem !!!*/ +#define _PRINT_PAGE_CRC_ 1//also need to set in arch/arm/kernel/hibernate.c +#undef _PRINT_PAGE_CRC_ + +#ifdef _PRINT_PAGE_CRC_ +#include <linux/crc32.h>//for page crc +#endif + +#define STD_USER_ABORT 0xA5 +#define STD_RETRY_TIMES (2) + +struct swsusp_info { + struct new_utsname uts; + u32 version_code; + unsigned long num_physpages; + int cpus; + unsigned long image_pages; + unsigned long pages; + unsigned long size; + char archdata[1024]; +} __attribute__((aligned(PAGE_SIZE))); + +#ifdef CONFIG_HIBERNATION +/* kernel/power/snapshot.c */ +extern void __init hibernate_reserved_size_init(void); +extern void __init hibernate_image_size_init(void); + +#ifdef CONFIG_ARCH_HIBERNATION_HEADER +/* Maximum size of architecture specific data in a hibernation header */ +#define MAX_ARCH_HEADER_SIZE (sizeof(struct new_utsname) + 4) + +extern int arch_hibernation_header_save(void *addr, unsigned int max_size); +extern int arch_hibernation_header_restore(void *addr); + +static inline int init_header_complete(struct swsusp_info *info) +{ + return arch_hibernation_header_save(info, MAX_ARCH_HEADER_SIZE); +} + +static inline char *check_image_kernel(struct swsusp_info *info) +{ + return arch_hibernation_header_restore(info) ? + "architecture specific data" : NULL; +} +#endif /* CONFIG_ARCH_HIBERNATION_HEADER */ + +extern void __weak swsusp_arch_add_info(char *archdata, size_t size); +extern int swsusp_swap_check(void); +/* + * Keep some memory free so that I/O operations can succeed without paging + * [Might this be more than 4 MB?] + */ +#define PAGES_FOR_IO ((4096 * 1024) >> PAGE_SHIFT) + +/* + * Keep 1 MB of memory free so that device drivers can allocate some pages in + * their .suspend() routines without breaking the suspend to disk. + */ +#define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) + +/* kernel/power/hibernate.c */ +extern bool freezer_test_done; + +extern int hibernation_snapshot(int platform_mode); +extern int hibernation_restore(int platform_mode); +extern int hibernation_platform_enter(void); + +#else /* !CONFIG_HIBERNATION */ + +static inline void hibernate_reserved_size_init(void) {} +static inline void hibernate_image_size_init(void) {} +#endif /* !CONFIG_HIBERNATION */ + +extern int pfn_is_nosave(unsigned long); + +#define power_attr(_name) \ +static struct kobj_attribute _name##_attr = { \ + .attr = { \ + .name = __stringify(_name), \ + .mode = 0644, \ + }, \ + .show = _name##_show, \ + .store = _name##_store, \ +} + +/* Preferred image size in bytes (default 500 MB) */ +extern unsigned long image_size; +/* Size of memory reserved for drivers (default SPARE_PAGES x PAGE_SIZE) */ +extern unsigned long reserved_size; +extern int in_suspend; +extern dev_t swsusp_resume_device; +extern sector_t swsusp_resume_block; + +extern asmlinkage int swsusp_arch_suspend(void); +extern asmlinkage int swsusp_arch_resume(void); + +extern int create_basic_memory_bitmaps(void); +extern void free_basic_memory_bitmaps(void); +extern int hibernate_preallocate_memory(void); + +/** + * Auxiliary structure used for reading the snapshot image data and + * metadata from and writing them to the list of page backup entries + * (PBEs) which is the main data structure of swsusp. + * + * Using struct snapshot_handle we can transfer the image, including its + * metadata, as a continuous sequence of bytes with the help of + * snapshot_read_next() and snapshot_write_next(). + * + * The code that writes the image to a storage or transfers it to + * the user land is required to use snapshot_read_next() for this + * purpose and it should not make any assumptions regarding the internal + * structure of the image. Similarly, the code that reads the image from + * a storage or transfers it from the user land is required to use + * snapshot_write_next(). + * + * This may allow us to change the internal structure of the image + * in the future with considerably less effort. + */ + +struct snapshot_handle { + unsigned int cur; /* number of the block of PAGE_SIZE bytes the + * next operation will refer to (ie. current) + */ + void *buffer; /* address of the block to read from + * or write to + */ + int sync_read; /* Set to one to notify the caller of + * snapshot_write_next() that it may + * need to call wait_on_bio_chain() + */ +}; + +/* This macro returns the address from/to which the caller of + * snapshot_read_next()/snapshot_write_next() is allowed to + * read/write data after the function returns + */ +#define data_of(handle) ((handle).buffer) + +extern unsigned int snapshot_additional_pages(struct zone *zone); +extern unsigned long snapshot_get_image_size(void); +extern int snapshot_read_next(struct snapshot_handle *handle); +extern int snapshot_write_next(struct snapshot_handle *handle); +extern void snapshot_write_finalize(struct snapshot_handle *handle); +extern int snapshot_image_loaded(struct snapshot_handle *handle); + +/* If unset, the snapshot device cannot be open. */ +extern atomic_t snapshot_device_available; + +extern sector_t alloc_swapdev_block(int swap); +extern void free_all_swap_pages(int swap); +extern int swsusp_swap_in_use(void); + +/* + * Flags that can be passed from the hibernatig hernel to the "boot" kernel in + * the image header. + */ +#define SF_PLATFORM_MODE 1 +#define SF_NOCOMPRESS_MODE 2 +#define SF_CRC32_MODE 4 + +/* kernel/power/hibernate.c */ +extern int swsusp_check(void); +extern void swsusp_free(void); +extern int swsusp_read(unsigned int *flags_p); +extern int swsusp_write(unsigned int flags); +extern void swsusp_close(fmode_t); + +/* kernel/power/block_io.c */ +extern struct block_device *hib_resume_bdev; + +extern int hib_bio_read_page(pgoff_t page_off, void *addr, + struct bio **bio_chain); +extern int hib_bio_write_page(pgoff_t page_off, void *addr, + struct bio **bio_chain); +extern int hib_wait_on_bio_chain(struct bio **bio_chain); + +struct timeval; +/* kernel/power/swsusp.c */ +extern void swsusp_show_speed(struct timeval *, struct timeval *, + unsigned int, char *); + +/*kernel/power/snapshot.c*/ +extern void swsusp_free_page(void *buffer); + +#ifdef CONFIG_SUSPEND +/* kernel/power/suspend.c */ +extern const char *const pm_states[]; + +extern bool valid_state(suspend_state_t state); +extern int suspend_devices_and_enter(suspend_state_t state); +#else /* !CONFIG_SUSPEND */ +static inline int suspend_devices_and_enter(suspend_state_t state) +{ + return -ENOSYS; +} +static inline bool valid_state(suspend_state_t state) { return false; } +#endif /* !CONFIG_SUSPEND */ + +#ifdef CONFIG_PM_TEST_SUSPEND +/* kernel/power/suspend_test.c */ +extern void suspend_test_start(void); +extern void suspend_test_finish(const char *label); +#else /* !CONFIG_PM_TEST_SUSPEND */ +static inline void suspend_test_start(void) {} +static inline void suspend_test_finish(const char *label) {} +#endif /* !CONFIG_PM_TEST_SUSPEND */ + +#ifdef CONFIG_PM_SLEEP +/* kernel/power/main.c */ +extern int pm_notifier_call_chain(unsigned long val); +#endif + +#ifdef CONFIG_HIGHMEM +int restore_highmem(void); +#else +static inline unsigned int count_highmem_pages(void) { return 0; } +static inline int restore_highmem(void) { return 0; } +#endif + +/* + * Suspend test levels + */ +enum { + /* keep first */ + TEST_NONE, + TEST_CORE, + TEST_CPUS, + TEST_PLATFORM, + TEST_DEVICES, + TEST_FREEZER, + /* keep last */ + __TEST_AFTER_LAST +}; + +#define TEST_FIRST TEST_NONE +#define TEST_MAX (__TEST_AFTER_LAST - 1) + +extern int pm_test_level; + +#ifdef CONFIG_SUSPEND_FREEZER +static inline int suspend_freeze_processes(void) +{ + int error; + + error = freeze_processes(); + /* + * freeze_processes() automatically thaws every task if freezing + * fails. So we need not do anything extra upon error. + */ + if (error) + return error; + + error = freeze_kernel_threads(); + /* + * freeze_kernel_threads() thaws only kernel threads upon freezing + * failure. So we have to thaw the userspace tasks ourselves. + */ + if (error) + thaw_processes(); + + return error; +} + +static inline void suspend_thaw_processes(void) +{ + thaw_processes(); +} +#else +static inline int suspend_freeze_processes(void) +{ + return 0; +} + +static inline void suspend_thaw_processes(void) +{ +} +#endif + +#ifdef CONFIG_PM_AUTOSLEEP + +/* kernel/power/autosleep.c */ +extern int pm_autosleep_init(void); +extern int pm_autosleep_lock(void); +extern void pm_autosleep_unlock(void); +extern suspend_state_t pm_autosleep_state(void); +extern int pm_autosleep_set_state(suspend_state_t state); + +#else /* !CONFIG_PM_AUTOSLEEP */ + +static inline int pm_autosleep_init(void) { return 0; } +static inline int pm_autosleep_lock(void) { return 0; } +static inline void pm_autosleep_unlock(void) {} +static inline suspend_state_t pm_autosleep_state(void) { return PM_SUSPEND_ON; } + +#endif /* !CONFIG_PM_AUTOSLEEP */ + +#ifdef CONFIG_PM_WAKELOCKS + +/* kernel/power/wakelock.c */ +extern ssize_t pm_show_wakelocks(char *buf, bool show_active); +extern int pm_wake_lock(const char *buf); +extern int pm_wake_unlock(const char *buf); + +#endif /* !CONFIG_PM_WAKELOCKS */ diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c new file mode 100644 index 00000000..d5235937 --- /dev/null +++ b/kernel/power/poweroff.c @@ -0,0 +1,46 @@ +/* + * poweroff.c - sysrq handler to gracefully power down machine. + * + * This file is released under the GPL v2 + */ + +#include <linux/kernel.h> +#include <linux/sysrq.h> +#include <linux/init.h> +#include <linux/pm.h> +#include <linux/workqueue.h> +#include <linux/reboot.h> +#include <linux/cpumask.h> + +/* + * When the user hits Sys-Rq o to power down the machine this is the + * callback we use. + */ + +static void do_poweroff(struct work_struct *dummy) +{ + kernel_power_off(); +} + +static DECLARE_WORK(poweroff_work, do_poweroff); + +static void handle_poweroff(int key) +{ + /* run sysrq poweroff on boot cpu */ + schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work); +} + +static struct sysrq_key_op sysrq_poweroff_op = { + .handler = handle_poweroff, + .help_msg = "powerOff", + .action_msg = "Power Off", + .enable_mask = SYSRQ_ENABLE_BOOT, +}; + +static int pm_sysrq_init(void) +{ + register_sysrq_key('o', &sysrq_poweroff_op); + return 0; +} + +subsys_initcall(pm_sysrq_init); diff --git a/kernel/power/process.c b/kernel/power/process.c new file mode 100644 index 00000000..19db29f6 --- /dev/null +++ b/kernel/power/process.c @@ -0,0 +1,220 @@ +/* + * drivers/power/process.c - Functions for starting/stopping processes on + * suspend transitions. + * + * Originally from swsusp. + */ + + +#undef DEBUG + +#include <linux/interrupt.h> +#include <linux/oom.h> +#include <linux/suspend.h> +#include <linux/module.h> +#include <linux/syscalls.h> +#include <linux/freezer.h> +#include <linux/delay.h> +#include <linux/workqueue.h> +#include <linux/kmod.h> + +/* + * Timeout for stopping processes + */ +#define TIMEOUT (20 * HZ) + +static int try_to_freeze_tasks(bool user_only) +{ + struct task_struct *g, *p; + unsigned long end_time; + unsigned int todo; + bool wq_busy = false; + struct timeval start, end; + u64 elapsed_csecs64; + unsigned int elapsed_csecs; + bool wakeup = false; + + do_gettimeofday(&start); + + end_time = jiffies + TIMEOUT; + + if (!user_only) + freeze_workqueues_begin(); + + while (true) { + todo = 0; + read_lock(&tasklist_lock); + do_each_thread(g, p) { + if (p == current || !freeze_task(p)) + continue; + + /* + * Now that we've done set_freeze_flag, don't + * perturb a task in TASK_STOPPED or TASK_TRACED. + * It is "frozen enough". If the task does wake + * up, it will immediately call try_to_freeze. + * + * Because freeze_task() goes through p's scheduler lock, it's + * guaranteed that TASK_STOPPED/TRACED -> TASK_RUNNING + * transition can't race with task state testing here. + */ + if (!task_is_stopped_or_traced(p) && + !freezer_should_skip(p)) + todo++; + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + + if (!user_only) { + wq_busy = freeze_workqueues_busy(); + todo += wq_busy; + } + + if (!todo || time_after(jiffies, end_time)) + break; + + if (pm_wakeup_pending()) { + wakeup = true; + break; + } + + /* + * We need to retry, but first give the freezing tasks some + * time to enter the regrigerator. + */ + msleep(10); + } + + do_gettimeofday(&end); + elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start); + do_div(elapsed_csecs64, NSEC_PER_SEC / 100); + elapsed_csecs = elapsed_csecs64; + + if (todo) { + printk("\n"); + printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds " + "(%d tasks refusing to freeze, wq_busy=%d):\n", + wakeup ? "aborted" : "failed", + elapsed_csecs / 100, elapsed_csecs % 100, + todo - wq_busy, wq_busy); + + if (!wakeup) { + read_lock(&tasklist_lock); + do_each_thread(g, p) { + if (p != current && !freezer_should_skip(p) + && freezing(p) && !frozen(p)) + sched_show_task(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + } + } else { + printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100, + elapsed_csecs % 100); + } + + return todo ? -EBUSY : 0; +} + +/** + * freeze_processes - Signal user space processes to enter the refrigerator. + * + * On success, returns 0. On failure, -errno and system is fully thawed. + */ +int freeze_processes(void) +{ + int error; + + error = __usermodehelper_disable(UMH_FREEZING); + if (error) + return error; + + if (!pm_freezing) + atomic_inc(&system_freezing_cnt); + + printk("Freezing user space processes ... "); + pm_freezing = true; + error = try_to_freeze_tasks(true); + if (!error) { + printk("done."); + __usermodehelper_set_disable_depth(UMH_DISABLED); + oom_killer_disable(); + } + printk("\n"); + BUG_ON(in_atomic()); + + if (error) + thaw_processes(); + return error; +} + +/** + * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. + * + * On success, returns 0. On failure, -errno and only the kernel threads are + * thawed, so as to give a chance to the caller to do additional cleanups + * (if any) before thawing the userspace tasks. So, it is the responsibility + * of the caller to thaw the userspace tasks, when the time is right. + */ +int freeze_kernel_threads(void) +{ + int error; + + printk("Freezing remaining freezable tasks ... "); + pm_nosig_freezing = true; + error = try_to_freeze_tasks(false); + if (!error) + printk("done."); + + printk("\n"); + BUG_ON(in_atomic()); + + if (error) + thaw_kernel_threads(); + return error; +} + +void thaw_processes(void) +{ + struct task_struct *g, *p; + + if (pm_freezing) + atomic_dec(&system_freezing_cnt); + pm_freezing = false; + pm_nosig_freezing = false; + + oom_killer_enable(); + + printk("Restarting tasks ... "); + + thaw_workqueues(); + + read_lock(&tasklist_lock); + do_each_thread(g, p) { + __thaw_task(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + + usermodehelper_enable(); + + schedule(); + printk("done.\n"); +} + +void thaw_kernel_threads(void) +{ + struct task_struct *g, *p; + + pm_nosig_freezing = false; + printk("Restarting kernel threads ... "); + + thaw_workqueues(); + + read_lock(&tasklist_lock); + do_each_thread(g, p) { + if (p->flags & (PF_KTHREAD | PF_WQ_WORKER)) + __thaw_task(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + + schedule(); + printk("done.\n"); +} diff --git a/kernel/power/qos.c b/kernel/power/qos.c new file mode 100644 index 00000000..6a031e68 --- /dev/null +++ b/kernel/power/qos.c @@ -0,0 +1,538 @@ +/* + * This module exposes the interface to kernel space for specifying + * QoS dependencies. It provides infrastructure for registration of: + * + * Dependents on a QoS value : register requests + * Watchers of QoS value : get notified when target QoS value changes + * + * This QoS design is best effort based. Dependents register their QoS needs. + * Watchers register to keep track of the current QoS needs of the system. + * + * There are 3 basic classes of QoS parameter: latency, timeout, throughput + * each have defined units: + * latency: usec + * timeout: usec <-- currently not used. + * throughput: kbs (kilo byte / sec) + * + * There are lists of pm_qos_objects each one wrapping requests, notifiers + * + * User mode requests on a QOS parameter register themselves to the + * subsystem by opening the device node /dev/... and writing there request to + * the node. As long as the process holds a file handle open to the node the + * client continues to be accounted for. Upon file release the usermode + * request is removed and a new qos target is computed. This way when the + * request that the application has is cleaned up when closes the file + * pointer or exits the pm_qos_object will get an opportunity to clean up. + * + * Mark Gross <mgross@linux.intel.com> + */ + +/*#define DEBUG*/ + +#include <linux/pm_qos.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/device.h> +#include <linux/miscdevice.h> +#include <linux/string.h> +#include <linux/platform_device.h> +#include <linux/init.h> +#include <linux/kernel.h> + +#include <linux/uaccess.h> +#include <linux/export.h> + +/* + * locking rule: all changes to constraints or notifiers lists + * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock + * held, taken with _irqsave. One lock to rule them all + */ +struct pm_qos_object { + struct pm_qos_constraints *constraints; + struct miscdevice pm_qos_power_miscdev; + char *name; +}; + +static DEFINE_SPINLOCK(pm_qos_lock); + +static struct pm_qos_object null_pm_qos; + +static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); +static struct pm_qos_constraints cpu_dma_constraints = { + .list = PLIST_HEAD_INIT(cpu_dma_constraints.list), + .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, + .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, + .type = PM_QOS_MIN, + .notifiers = &cpu_dma_lat_notifier, +}; +static struct pm_qos_object cpu_dma_pm_qos = { + .constraints = &cpu_dma_constraints, + .name = "cpu_dma_latency", +}; + +static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); +static struct pm_qos_constraints network_lat_constraints = { + .list = PLIST_HEAD_INIT(network_lat_constraints.list), + .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, + .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, + .type = PM_QOS_MIN, + .notifiers = &network_lat_notifier, +}; +static struct pm_qos_object network_lat_pm_qos = { + .constraints = &network_lat_constraints, + .name = "network_latency", +}; + + +static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); +static struct pm_qos_constraints network_tput_constraints = { + .list = PLIST_HEAD_INIT(network_tput_constraints.list), + .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, + .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, + .type = PM_QOS_MAX, + .notifiers = &network_throughput_notifier, +}; +static struct pm_qos_object network_throughput_pm_qos = { + .constraints = &network_tput_constraints, + .name = "network_throughput", +}; + + +static struct pm_qos_object *pm_qos_array[] = { + &null_pm_qos, + &cpu_dma_pm_qos, + &network_lat_pm_qos, + &network_throughput_pm_qos +}; + +static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos); +static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, + size_t count, loff_t *f_pos); +static int pm_qos_power_open(struct inode *inode, struct file *filp); +static int pm_qos_power_release(struct inode *inode, struct file *filp); + +static const struct file_operations pm_qos_power_fops = { + .write = pm_qos_power_write, + .read = pm_qos_power_read, + .open = pm_qos_power_open, + .release = pm_qos_power_release, + .llseek = noop_llseek, +}; + +/* unlocked internal variant */ +static inline int pm_qos_get_value(struct pm_qos_constraints *c) +{ + if (plist_head_empty(&c->list)) + return c->default_value; + + switch (c->type) { + case PM_QOS_MIN: + return plist_first(&c->list)->prio; + + case PM_QOS_MAX: + return plist_last(&c->list)->prio; + + default: + /* runtime check for not using enum */ + BUG(); + } +} + +s32 pm_qos_read_value(struct pm_qos_constraints *c) +{ + return c->target_value; +} + +static inline void pm_qos_set_value(struct pm_qos_constraints *c, s32 value) +{ + c->target_value = value; +} + +/** + * pm_qos_update_target - manages the constraints list and calls the notifiers + * if needed + * @c: constraints data struct + * @node: request to add to the list, to update or to remove + * @action: action to take on the constraints list + * @value: value of the request to add or update + * + * This function returns 1 if the aggregated constraint value has changed, 0 + * otherwise. + */ +int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, + enum pm_qos_req_action action, int value) +{ + unsigned long flags; + int prev_value, curr_value, new_value; + + spin_lock_irqsave(&pm_qos_lock, flags); + prev_value = pm_qos_get_value(c); + if (value == PM_QOS_DEFAULT_VALUE) + new_value = c->default_value; + else + new_value = value; + + switch (action) { + case PM_QOS_REMOVE_REQ: + plist_del(node, &c->list); + break; + case PM_QOS_UPDATE_REQ: + /* + * to change the list, we atomically remove, reinit + * with new value and add, then see if the extremal + * changed + */ + plist_del(node, &c->list); + case PM_QOS_ADD_REQ: + plist_node_init(node, new_value); + plist_add(node, &c->list); + break; + default: + /* no action */ + ; + } + + curr_value = pm_qos_get_value(c); + pm_qos_set_value(c, curr_value); + + spin_unlock_irqrestore(&pm_qos_lock, flags); + + if (prev_value != curr_value) { + blocking_notifier_call_chain(c->notifiers, + (unsigned long)curr_value, + NULL); + return 1; + } else { + return 0; + } +} + +/** + * pm_qos_request - returns current system wide qos expectation + * @pm_qos_class: identification of which qos value is requested + * + * This function returns the current target value. + */ +int pm_qos_request(int pm_qos_class) +{ + return pm_qos_read_value(pm_qos_array[pm_qos_class]->constraints); +} +EXPORT_SYMBOL_GPL(pm_qos_request); + +int pm_qos_request_active(struct pm_qos_request *req) +{ + return req->pm_qos_class != 0; +} +EXPORT_SYMBOL_GPL(pm_qos_request_active); + +/** + * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout + * @work: work struct for the delayed work (timeout) + * + * This cancels the timeout request by falling back to the default at timeout. + */ +static void pm_qos_work_fn(struct work_struct *work) +{ + struct pm_qos_request *req = container_of(to_delayed_work(work), + struct pm_qos_request, + work); + + pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE); +} + +/** + * pm_qos_add_request - inserts new qos request into the list + * @req: pointer to a preallocated handle + * @pm_qos_class: identifies which list of qos request to use + * @value: defines the qos request + * + * This function inserts a new entry in the pm_qos_class list of requested qos + * performance characteristics. It recomputes the aggregate QoS expectations + * for the pm_qos_class of parameters and initializes the pm_qos_request + * handle. Caller needs to save this handle for later use in updates and + * removal. + */ + +void pm_qos_add_request(struct pm_qos_request *req, + int pm_qos_class, s32 value) +{ + if (!req) /*guard against callers passing in null */ + return; + + if (pm_qos_request_active(req)) { + WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n"); + return; + } + req->pm_qos_class = pm_qos_class; + INIT_DELAYED_WORK(&req->work, pm_qos_work_fn); + pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints, + &req->node, PM_QOS_ADD_REQ, value); +} +EXPORT_SYMBOL_GPL(pm_qos_add_request); + +/** + * pm_qos_update_request - modifies an existing qos request + * @req : handle to list element holding a pm_qos request to use + * @value: defines the qos request + * + * Updates an existing qos request for the pm_qos_class of parameters along + * with updating the target pm_qos_class value. + * + * Attempts are made to make this code callable on hot code paths. + */ +void pm_qos_update_request(struct pm_qos_request *req, + s32 new_value) +{ + if (!req) /*guard against callers passing in null */ + return; + + if (!pm_qos_request_active(req)) { + WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n"); + return; + } + + if (delayed_work_pending(&req->work)) + cancel_delayed_work_sync(&req->work); + + if (new_value != req->node.prio) + pm_qos_update_target( + pm_qos_array[req->pm_qos_class]->constraints, + &req->node, PM_QOS_UPDATE_REQ, new_value); +} +EXPORT_SYMBOL_GPL(pm_qos_update_request); + +/** + * pm_qos_update_request_timeout - modifies an existing qos request temporarily. + * @req : handle to list element holding a pm_qos request to use + * @new_value: defines the temporal qos request + * @timeout_us: the effective duration of this qos request in usecs. + * + * After timeout_us, this qos request is cancelled automatically. + */ +void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value, + unsigned long timeout_us) +{ + if (!req) + return; + if (WARN(!pm_qos_request_active(req), + "%s called for unknown object.", __func__)) + return; + + if (delayed_work_pending(&req->work)) + cancel_delayed_work_sync(&req->work); + + if (new_value != req->node.prio) + pm_qos_update_target( + pm_qos_array[req->pm_qos_class]->constraints, + &req->node, PM_QOS_UPDATE_REQ, new_value); + + schedule_delayed_work(&req->work, usecs_to_jiffies(timeout_us)); +} + +/** + * pm_qos_remove_request - modifies an existing qos request + * @req: handle to request list element + * + * Will remove pm qos request from the list of constraints and + * recompute the current target value for the pm_qos_class. Call this + * on slow code paths. + */ +void pm_qos_remove_request(struct pm_qos_request *req) +{ + if (!req) /*guard against callers passing in null */ + return; + /* silent return to keep pcm code cleaner */ + + if (!pm_qos_request_active(req)) { + WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n"); + return; + } + + if (delayed_work_pending(&req->work)) + cancel_delayed_work_sync(&req->work); + + pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, + &req->node, PM_QOS_REMOVE_REQ, + PM_QOS_DEFAULT_VALUE); + memset(req, 0, sizeof(*req)); +} +EXPORT_SYMBOL_GPL(pm_qos_remove_request); + +/** + * pm_qos_add_notifier - sets notification entry for changes to target value + * @pm_qos_class: identifies which qos target changes should be notified. + * @notifier: notifier block managed by caller. + * + * will register the notifier into a notification chain that gets called + * upon changes to the pm_qos_class target value. + */ +int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) +{ + int retval; + + retval = blocking_notifier_chain_register( + pm_qos_array[pm_qos_class]->constraints->notifiers, + notifier); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_qos_add_notifier); + +/** + * pm_qos_remove_notifier - deletes notification entry from chain. + * @pm_qos_class: identifies which qos target changes are notified. + * @notifier: notifier block to be removed. + * + * will remove the notifier from the notification chain that gets called + * upon changes to the pm_qos_class target value. + */ +int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) +{ + int retval; + + retval = blocking_notifier_chain_unregister( + pm_qos_array[pm_qos_class]->constraints->notifiers, + notifier); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); + +/* User space interface to PM QoS classes via misc devices */ +static int register_pm_qos_misc(struct pm_qos_object *qos) +{ + qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; + qos->pm_qos_power_miscdev.name = qos->name; + qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; + + return misc_register(&qos->pm_qos_power_miscdev); +} + +static int find_pm_qos_object_by_minor(int minor) +{ + int pm_qos_class; + + for (pm_qos_class = 0; + pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) { + if (minor == + pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor) + return pm_qos_class; + } + return -1; +} + +static int pm_qos_power_open(struct inode *inode, struct file *filp) +{ + long pm_qos_class; + + pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); + if (pm_qos_class >= 0) { + struct pm_qos_request *req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE); + filp->private_data = req; + + return 0; + } + return -EPERM; +} + +static int pm_qos_power_release(struct inode *inode, struct file *filp) +{ + struct pm_qos_request *req; + + req = filp->private_data; + pm_qos_remove_request(req); + kfree(req); + + return 0; +} + + +static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, + size_t count, loff_t *f_pos) +{ + s32 value; + unsigned long flags; + struct pm_qos_request *req = filp->private_data; + + if (!req) + return -EINVAL; + if (!pm_qos_request_active(req)) + return -EINVAL; + + spin_lock_irqsave(&pm_qos_lock, flags); + value = pm_qos_get_value(pm_qos_array[req->pm_qos_class]->constraints); + spin_unlock_irqrestore(&pm_qos_lock, flags); + + return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32)); +} + +static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos) +{ + s32 value; + struct pm_qos_request *req; + + if (count == sizeof(s32)) { + if (copy_from_user(&value, buf, sizeof(s32))) + return -EFAULT; + } else if (count <= 11) { /* ASCII perhaps? */ + char ascii_value[11]; + unsigned long int ulval; + int ret; + + if (copy_from_user(ascii_value, buf, count)) + return -EFAULT; + + if (count > 10) { + if (ascii_value[10] == '\n') + ascii_value[10] = '\0'; + else + return -EINVAL; + } else { + ascii_value[count] = '\0'; + } + ret = strict_strtoul(ascii_value, 16, &ulval); + if (ret) { + pr_debug("%s, 0x%lx, 0x%x\n", ascii_value, ulval, ret); + return -EINVAL; + } + value = (s32)lower_32_bits(ulval); + } else { + return -EINVAL; + } + + req = filp->private_data; + pm_qos_update_request(req, value); + + return count; +} + + +static int __init pm_qos_power_init(void) +{ + int ret = 0; + int i; + + BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES); + + for (i = 1; i < PM_QOS_NUM_CLASSES; i++) { + ret = register_pm_qos_misc(pm_qos_array[i]); + if (ret < 0) { + printk(KERN_ERR "pm_qos_param: %s setup failed\n", + pm_qos_array[i]->name); + return ret; + } + } + + return ret; +} + +late_initcall(pm_qos_power_init); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c new file mode 100644 index 00000000..e36df7c3 --- /dev/null +++ b/kernel/power/snapshot.c @@ -0,0 +1,2433 @@ +/* + * linux/kernel/power/snapshot.c + * + * This file provides system snapshot/restore functionality for swsusp. + * + * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz> + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * This file is released under the GPLv2. + * + */ + +#include <linux/version.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/suspend.h> +#include <linux/delay.h> +#include <linux/bitops.h> +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/pm.h> +#include <linux/device.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/syscalls.h> +#include <linux/console.h> +#include <linux/highmem.h> +#include <linux/list.h> +#include <linux/slab.h> + +#include <asm/uaccess.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <asm/io.h> + +#include "power.h" + +static int swsusp_page_is_free(struct page *); +static void swsusp_set_page_forbidden(struct page *); +static void swsusp_unset_page_forbidden(struct page *); + +/* + * Number of bytes to reserve for memory allocations made by device drivers + * from their ->freeze() and ->freeze_noirq() callbacks so that they don't + * cause image creation to fail (tunable via /sys/power/reserved_size). + */ +unsigned long reserved_size; + +void __init hibernate_reserved_size_init(void) +{ + reserved_size = SPARE_PAGES * PAGE_SIZE; +} + +/* + * Preferred image size in bytes (tunable via /sys/power/image_size). + * When it is set to N, swsusp will do its best to ensure the image + * size will not exceed N bytes, but if that is impossible, it will + * try to create the smallest image possible. + */ +unsigned long image_size; + +void __init hibernate_image_size_init(void) +{ + image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; +} + +/* List of PBEs needed for restoring the pages that were allocated before + * the suspend and included in the suspend image, but have also been + * allocated by the "resume" kernel, so their contents cannot be written + * directly to their "original" page frames. + */ +struct pbe *restore_pblist; + +/* Pointer to an auxiliary buffer (1 page) */ +static void *buffer; + +/** + * @safe_needed - on resume, for storing the PBE list and the image, + * we can only use memory pages that do not conflict with the pages + * used before suspend. The unsafe pages have PageNosaveFree set + * and we count them using unsafe_pages. + * + * Each allocated image page is marked as PageNosave and PageNosaveFree + * so that swsusp_free() can release it. + */ + +#define PG_ANY 0 +#define PG_SAFE 1 +#define PG_UNSAFE_CLEAR 1 +#define PG_UNSAFE_KEEP 0 + +static unsigned int allocated_unsafe_pages; + +static void *get_image_page(gfp_t gfp_mask, int safe_needed) +{ + void *res; + + res = (void *)get_zeroed_page(gfp_mask); + if (safe_needed) + while (res && swsusp_page_is_free(virt_to_page(res))) { + /* The page is unsafe, mark it for swsusp_free() */ + swsusp_set_page_forbidden(virt_to_page(res)); + allocated_unsafe_pages++; + res = (void *)get_zeroed_page(gfp_mask); + } + if (res) { + swsusp_set_page_forbidden(virt_to_page(res)); + swsusp_set_page_free(virt_to_page(res)); + } + return res; +} + +unsigned long get_safe_page(gfp_t gfp_mask) +{ + return (unsigned long)get_image_page(gfp_mask, PG_SAFE); +} + +static struct page *alloc_image_page(gfp_t gfp_mask) +{ + struct page *page; + + page = alloc_page(gfp_mask); + if (page) { + swsusp_set_page_forbidden(page); + swsusp_set_page_free(page); + } + return page; +} + +/** + * free_image_page - free page represented by @addr, allocated with + * get_image_page (page flags set by it must be cleared) + */ + +static inline void free_image_page(void *addr, int clear_nosave_free) +{ + struct page *page; + + BUG_ON(!virt_addr_valid(addr)); + + page = virt_to_page(addr); + + swsusp_unset_page_forbidden(page); + if (clear_nosave_free) + swsusp_unset_page_free(page); + + __free_page(page); +} + +/* struct linked_page is used to build chains of pages */ + +#define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) + +struct linked_page { + struct linked_page *next; + char data[LINKED_PAGE_DATA_SIZE]; +} __attribute__((packed)); + +static inline void +free_list_of_pages(struct linked_page *list, int clear_page_nosave) +{ + while (list) { + struct linked_page *lp = list->next; + + free_image_page(list, clear_page_nosave); + list = lp; + } +} + +/** + * struct chain_allocator is used for allocating small objects out of + * a linked list of pages called 'the chain'. + * + * The chain grows each time when there is no room for a new object in + * the current page. The allocated objects cannot be freed individually. + * It is only possible to free them all at once, by freeing the entire + * chain. + * + * NOTE: The chain allocator may be inefficient if the allocated objects + * are not much smaller than PAGE_SIZE. + */ + +struct chain_allocator { + struct linked_page *chain; /* the chain */ + unsigned int used_space; /* total size of objects allocated out + * of the current page + */ + gfp_t gfp_mask; /* mask for allocating pages */ + int safe_needed; /* if set, only "safe" pages are allocated */ +}; + +static void +chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed) +{ + ca->chain = NULL; + ca->used_space = LINKED_PAGE_DATA_SIZE; + ca->gfp_mask = gfp_mask; + ca->safe_needed = safe_needed; +} + +static void *chain_alloc(struct chain_allocator *ca, unsigned int size) +{ + void *ret; + + if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { + struct linked_page *lp; + + lp = get_image_page(ca->gfp_mask, ca->safe_needed); + if (!lp) + return NULL; + + lp->next = ca->chain; + ca->chain = lp; + ca->used_space = 0; + } + ret = ca->chain->data + ca->used_space; + ca->used_space += size; + return ret; +} + +/** + * Data types related to memory bitmaps. + * + * Memory bitmap is a structure consiting of many linked lists of + * objects. The main list's elements are of type struct zone_bitmap + * and each of them corresonds to one zone. For each zone bitmap + * object there is a list of objects of type struct bm_block that + * represent each blocks of bitmap in which information is stored. + * + * struct memory_bitmap contains a pointer to the main list of zone + * bitmap objects, a struct bm_position used for browsing the bitmap, + * and a pointer to the list of pages used for allocating all of the + * zone bitmap objects and bitmap block objects. + * + * NOTE: It has to be possible to lay out the bitmap in memory + * using only allocations of order 0. Additionally, the bitmap is + * designed to work with arbitrary number of zones (this is over the + * top for now, but let's avoid making unnecessary assumptions ;-). + * + * struct zone_bitmap contains a pointer to a list of bitmap block + * objects and a pointer to the bitmap block object that has been + * most recently used for setting bits. Additionally, it contains the + * pfns that correspond to the start and end of the represented zone. + * + * struct bm_block contains a pointer to the memory page in which + * information is stored (in the form of a block of bitmap) + * It also contains the pfns that correspond to the start and end of + * the represented memory area. + */ + +#define BM_END_OF_MAP (~0UL) + +#define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) + +struct bm_block { + struct list_head hook; /* hook into a list of bitmap blocks */ + unsigned long start_pfn; /* pfn represented by the first bit */ + unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ + unsigned long *data; /* bitmap representing pages */ +}; + +static inline unsigned long bm_block_bits(struct bm_block *bb) +{ + return bb->end_pfn - bb->start_pfn; +} + +/* strcut bm_position is used for browsing memory bitmaps */ + +struct bm_position { + struct bm_block *block; + int bit; +}; + +struct memory_bitmap { + struct list_head blocks; /* list of bitmap blocks */ + struct linked_page *p_list; /* list of pages used to store zone + * bitmap objects and bitmap block + * objects + */ + struct bm_position cur; /* most recently used bit position */ +}; + +/* Functions that operate on memory bitmaps */ + +static void memory_bm_position_reset(struct memory_bitmap *bm) +{ + bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); + bm->cur.bit = 0; +} + +static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); + +/** + * create_bm_block_list - create a list of block bitmap objects + * @pages - number of pages to track + * @list - list to put the allocated blocks into + * @ca - chain allocator to be used for allocating memory + */ +static int create_bm_block_list(unsigned long pages, + struct list_head *list, + struct chain_allocator *ca) +{ + unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); + + while (nr_blocks-- > 0) { + struct bm_block *bb; + + bb = chain_alloc(ca, sizeof(struct bm_block)); + if (!bb) + return -ENOMEM; + list_add(&bb->hook, list); + } + + return 0; +} + +struct mem_extent { + struct list_head hook; + unsigned long start; + unsigned long end; +}; + +/** + * free_mem_extents - free a list of memory extents + * @list - list of extents to empty + */ +static void free_mem_extents(struct list_head *list) +{ + struct mem_extent *ext, *aux; + + list_for_each_entry_safe(ext, aux, list, hook) { + list_del(&ext->hook); + kfree(ext); + } +} + +/** + * create_mem_extents - create a list of memory extents representing + * contiguous ranges of PFNs + * @list - list to put the extents into + * @gfp_mask - mask to use for memory allocations + */ +static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) +{ + struct zone *zone; + + INIT_LIST_HEAD(list); + + for_each_populated_zone(zone) { + unsigned long zone_start, zone_end; + struct mem_extent *ext, *cur, *aux; + + zone_start = zone->zone_start_pfn; + zone_end = zone->zone_start_pfn + zone->spanned_pages; + + list_for_each_entry(ext, list, hook) + if (zone_start <= ext->end) + break; + + if (&ext->hook == list || zone_end < ext->start) { + /* New extent is necessary */ + struct mem_extent *new_ext; + + new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); + if (!new_ext) { + free_mem_extents(list); + return -ENOMEM; + } + new_ext->start = zone_start; + new_ext->end = zone_end; + list_add_tail(&new_ext->hook, &ext->hook); + continue; + } + + /* Merge this zone's range of PFNs with the existing one */ + if (zone_start < ext->start) + ext->start = zone_start; + if (zone_end > ext->end) + ext->end = zone_end; + + /* More merging may be possible */ + cur = ext; + list_for_each_entry_safe_continue(cur, aux, list, hook) { + if (zone_end < cur->start) + break; + if (zone_end < cur->end) + ext->end = cur->end; + list_del(&cur->hook); + kfree(cur); + } + } + + return 0; +} + +/** + * memory_bm_create - allocate memory for a memory bitmap + */ +static int +memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) +{ + struct chain_allocator ca; + struct list_head mem_extents; + struct mem_extent *ext; + int error; + + chain_init(&ca, gfp_mask, safe_needed); + INIT_LIST_HEAD(&bm->blocks); + + error = create_mem_extents(&mem_extents, gfp_mask); + if (error) + return error; + + list_for_each_entry(ext, &mem_extents, hook) { + struct bm_block *bb; + unsigned long pfn = ext->start; + unsigned long pages = ext->end - ext->start; + + bb = list_entry(bm->blocks.prev, struct bm_block, hook); + + error = create_bm_block_list(pages, bm->blocks.prev, &ca); + if (error) + goto Error; + + list_for_each_entry_continue(bb, &bm->blocks, hook) { + bb->data = get_image_page(gfp_mask, safe_needed); + if (!bb->data) { + error = -ENOMEM; + goto Error; + } + + bb->start_pfn = pfn; + if (pages >= BM_BITS_PER_BLOCK) { + pfn += BM_BITS_PER_BLOCK; + pages -= BM_BITS_PER_BLOCK; + } else { + /* This is executed only once in the loop */ + pfn += pages; + } + bb->end_pfn = pfn; + } + } + + bm->p_list = ca.chain; + memory_bm_position_reset(bm); + Exit: + free_mem_extents(&mem_extents); + return error; + + Error: + bm->p_list = ca.chain; + memory_bm_free(bm, PG_UNSAFE_CLEAR); + goto Exit; +} + +/** + * memory_bm_free - free memory occupied by the memory bitmap @bm + */ +static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) +{ + struct bm_block *bb; + + list_for_each_entry(bb, &bm->blocks, hook) + if (bb->data) + free_image_page(bb->data, clear_nosave_free); + + free_list_of_pages(bm->p_list, clear_nosave_free); + + INIT_LIST_HEAD(&bm->blocks); +} + +/** + * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds + * to given pfn. The cur_zone_bm member of @bm and the cur_block member + * of @bm->cur_zone_bm are updated. + */ +static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, + void **addr, unsigned int *bit_nr) +{ + struct bm_block *bb; + + /* + * Check if the pfn corresponds to the current bitmap block and find + * the block where it fits if this is not the case. + */ + bb = bm->cur.block; + if (pfn < bb->start_pfn) + list_for_each_entry_continue_reverse(bb, &bm->blocks, hook) + if (pfn >= bb->start_pfn) + break; + + if (pfn >= bb->end_pfn) + list_for_each_entry_continue(bb, &bm->blocks, hook) + if (pfn >= bb->start_pfn && pfn < bb->end_pfn) + break; + + if (&bb->hook == &bm->blocks) + return -EFAULT; + + /* The block has been found */ + bm->cur.block = bb; + pfn -= bb->start_pfn; + bm->cur.bit = pfn + 1; + *bit_nr = pfn; + *addr = bb->data; + return 0; +} + +static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + int error; + + error = memory_bm_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error); + set_bit(bit, addr); +} + +static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + int error; + + error = memory_bm_find_bit(bm, pfn, &addr, &bit); + if (!error) + set_bit(bit, addr); + return error; +} + +static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + int error; + + error = memory_bm_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error); + clear_bit(bit, addr); +} + +static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + int error; + + error = memory_bm_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error); + return test_bit(bit, addr); +} + +static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + + return !memory_bm_find_bit(bm, pfn, &addr, &bit); +} + +/** + * memory_bm_next_pfn - find the pfn that corresponds to the next set bit + * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is + * returned. + * + * It is required to run memory_bm_position_reset() before the first call to + * this function. + */ + +static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) +{ + struct bm_block *bb; + int bit; + + bb = bm->cur.block; + do { + bit = bm->cur.bit; + bit = find_next_bit(bb->data, bm_block_bits(bb), bit); + if (bit < bm_block_bits(bb)) + goto Return_pfn; + + bb = list_entry(bb->hook.next, struct bm_block, hook); + bm->cur.block = bb; + bm->cur.bit = 0; + } while (&bb->hook != &bm->blocks); + + memory_bm_position_reset(bm); + return BM_END_OF_MAP; + + Return_pfn: + bm->cur.bit = bit + 1; + return bb->start_pfn + bit; +} + +/** + * This structure represents a range of page frames the contents of which + * should not be saved during the suspend. + */ + +struct nosave_region { + struct list_head list; + unsigned long start_pfn; + unsigned long end_pfn; +}; + +static LIST_HEAD(nosave_regions); + +/** + * register_nosave_region - register a range of page frames the contents + * of which should not be saved during the suspend (to be used in the early + * initialization code) + */ + +void __init +__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, + int use_kmalloc) +{ + struct nosave_region *region; + + if (start_pfn >= end_pfn) + return; + + if (!list_empty(&nosave_regions)) { + /* Try to extend the previous region (they should be sorted) */ + region = list_entry(nosave_regions.prev, + struct nosave_region, list); + if (region->end_pfn == start_pfn) { + region->end_pfn = end_pfn; + goto Report; + } + } + if (use_kmalloc) { + /* during init, this shouldn't fail */ + region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); + BUG_ON(!region); + } else + /* This allocation cannot fail */ + region = alloc_bootmem(sizeof(struct nosave_region)); + region->start_pfn = start_pfn; + region->end_pfn = end_pfn; + list_add_tail(®ion->list, &nosave_regions); + Report: + printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n", + start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); +} + +/* + * Set bits in this map correspond to the page frames the contents of which + * should not be saved during the suspend. + */ +static struct memory_bitmap *forbidden_pages_map; + +/* Set bits in this map correspond to free page frames. */ +static struct memory_bitmap *free_pages_map; + +/* + * Each page frame allocated for creating the image is marked by setting the + * corresponding bits in forbidden_pages_map and free_pages_map simultaneously + */ + +void swsusp_set_page_free(struct page *page) +{ + if (free_pages_map) + memory_bm_set_bit(free_pages_map, page_to_pfn(page)); +} + +static int swsusp_page_is_free(struct page *page) +{ + return free_pages_map ? + memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; +} + +void swsusp_unset_page_free(struct page *page) +{ + if (free_pages_map) + memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); +} + +static void swsusp_set_page_forbidden(struct page *page) +{ + if (forbidden_pages_map) + memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); +} + +int swsusp_page_is_forbidden(struct page *page) +{ + return forbidden_pages_map ? + memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; +} + +static void swsusp_unset_page_forbidden(struct page *page) +{ + if (forbidden_pages_map) + memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); +} + +/** + * mark_nosave_pages - set bits corresponding to the page frames the + * contents of which should not be saved in a given bitmap. + */ + +static void mark_nosave_pages(struct memory_bitmap *bm) +{ + struct nosave_region *region; + + if (list_empty(&nosave_regions)) + return; + + list_for_each_entry(region, &nosave_regions, list) { + unsigned long pfn; + + pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n", + (unsigned long long) region->start_pfn << PAGE_SHIFT, + ((unsigned long long) region->end_pfn << PAGE_SHIFT) + - 1); + + for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) + if (pfn_valid(pfn)) { + /* + * It is safe to ignore the result of + * mem_bm_set_bit_check() here, since we won't + * touch the PFNs for which the error is + * returned anyway. + */ + mem_bm_set_bit_check(bm, pfn); + } + } +} + +/** + * create_basic_memory_bitmaps - create bitmaps needed for marking page + * frames that should not be saved and free page frames. The pointers + * forbidden_pages_map and free_pages_map are only modified if everything + * goes well, because we don't want the bits to be used before both bitmaps + * are set up. + */ + +int create_basic_memory_bitmaps(void) +{ + struct memory_bitmap *bm1, *bm2; + int error = 0; + + BUG_ON(forbidden_pages_map || free_pages_map); + + bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); + if (!bm1) + return -ENOMEM; + + error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); + if (error) + goto Free_first_object; + + bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); + if (!bm2) + goto Free_first_bitmap; + + error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); + if (error) + goto Free_second_object; + + forbidden_pages_map = bm1; + free_pages_map = bm2; + mark_nosave_pages(forbidden_pages_map); + + pr_debug("PM: Basic memory bitmaps created\n"); + + return 0; + + Free_second_object: + kfree(bm2); + Free_first_bitmap: + memory_bm_free(bm1, PG_UNSAFE_CLEAR); + Free_first_object: + kfree(bm1); + return -ENOMEM; +} + +/** + * free_basic_memory_bitmaps - free memory bitmaps allocated by + * create_basic_memory_bitmaps(). The auxiliary pointers are necessary + * so that the bitmaps themselves are not referred to while they are being + * freed. + */ + +void free_basic_memory_bitmaps(void) +{ + struct memory_bitmap *bm1, *bm2; + + BUG_ON(!(forbidden_pages_map && free_pages_map)); + + bm1 = forbidden_pages_map; + bm2 = free_pages_map; + forbidden_pages_map = NULL; + free_pages_map = NULL; + memory_bm_free(bm1, PG_UNSAFE_CLEAR); + kfree(bm1); + memory_bm_free(bm2, PG_UNSAFE_CLEAR); + kfree(bm2); + + pr_debug("PM: Basic memory bitmaps freed\n"); +} + +/** + * snapshot_additional_pages - estimate the number of additional pages + * be needed for setting up the suspend image data structures for given + * zone (usually the returned value is greater than the exact number) + */ + +unsigned int snapshot_additional_pages(struct zone *zone) +{ + unsigned int res; + + res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); + res += DIV_ROUND_UP(res * sizeof(struct bm_block), + LINKED_PAGE_DATA_SIZE); + return 2 * res; +} + +#ifdef CONFIG_HIGHMEM +/** + * count_free_highmem_pages - compute the total number of free highmem + * pages, system-wide. + */ + +static unsigned int count_free_highmem_pages(void) +{ + struct zone *zone; + unsigned int cnt = 0; + + for_each_populated_zone(zone) + if (is_highmem(zone)) + cnt += zone_page_state(zone, NR_FREE_PAGES); + + return cnt; +} + +/** + * saveable_highmem_page - Determine whether a highmem page should be + * included in the suspend image. + * + * We should save the page if it isn't Nosave or NosaveFree, or Reserved, + * and it isn't a part of a free chunk of pages. + */ +static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) +{ + struct page *page; + + if (!pfn_valid(pfn)) + return NULL; + + page = pfn_to_page(pfn); + if (page_zone(page) != zone) + return NULL; + + BUG_ON(!PageHighMem(page)); + + if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) || + PageReserved(page)) + return NULL; + + if (page_is_guard(page)) + return NULL; + + return page; +} + +/** + * count_highmem_pages - compute the total number of saveable highmem + * pages. + */ + +static unsigned int count_highmem_pages(void) +{ + struct zone *zone; + unsigned int n = 0; + + for_each_populated_zone(zone) { + unsigned long pfn, max_zone_pfn; + + if (!is_highmem(zone)) + continue; + + mark_free_pages(zone); + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (saveable_highmem_page(zone, pfn)) + n++; + } + return n; +} +#else +static inline void *saveable_highmem_page(struct zone *z, unsigned long p) +{ + return NULL; +} +#endif /* CONFIG_HIGHMEM */ + +/** + * saveable_page - Determine whether a non-highmem page should be included + * in the suspend image. + * + * We should save the page if it isn't Nosave, and is not in the range + * of pages statically defined as 'unsaveable', and it isn't a part of + * a free chunk of pages. + */ +static struct page *saveable_page(struct zone *zone, unsigned long pfn) +{ + struct page *page; + + if (!pfn_valid(pfn)) + return NULL; + + page = pfn_to_page(pfn); + if (page_zone(page) != zone) + return NULL; + + BUG_ON(PageHighMem(page)); + + if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) + return NULL; + + if (PageReserved(page) + && (!kernel_page_present(page) || pfn_is_nosave(pfn))) + return NULL; + + if (page_is_guard(page)) + return NULL; + + return page; +} + +/** + * count_data_pages - compute the total number of saveable non-highmem + * pages. + */ + +static unsigned int count_data_pages(void) +{ + struct zone *zone; + unsigned long pfn, max_zone_pfn; + unsigned int n = 0; + + for_each_populated_zone(zone) { + if (is_highmem(zone)) + continue; + + mark_free_pages(zone); + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (saveable_page(zone, pfn)) + n++; + } + return n; +} + +/* This is needed, because copy_page and memcpy are not usable for copying + * task structs. + */ +static inline void do_copy_page(long *dst, long *src) +{ + int n; + + for (n = PAGE_SIZE / sizeof(long); n; n--) + *dst++ = *src++; +} + + +/** + * safe_copy_page - check if the page we are going to copy is marked as + * present in the kernel page tables (this always is the case if + * CONFIG_DEBUG_PAGEALLOC is not set and in that case + * kernel_page_present() always returns 'true'). + */ +static void safe_copy_page(void *dst, struct page *s_page) +{ + if (kernel_page_present(s_page)) { + do_copy_page(dst, page_address(s_page)); + } else { + kernel_map_pages(s_page, 1, 1); + do_copy_page(dst, page_address(s_page)); + kernel_map_pages(s_page, 1, 0); + } +} + + +#ifdef CONFIG_HIGHMEM +static inline struct page * +page_is_saveable(struct zone *zone, unsigned long pfn) +{ + return is_highmem(zone) ? + saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); +} + +static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) +{ + struct page *s_page, *d_page; + void *src, *dst; + + s_page = pfn_to_page(src_pfn); + d_page = pfn_to_page(dst_pfn); + if (PageHighMem(s_page)) { + src = kmap_atomic(s_page); + dst = kmap_atomic(d_page); + do_copy_page(dst, src); + kunmap_atomic(dst); + kunmap_atomic(src); + } else { + if (PageHighMem(d_page)) { + /* Page pointed to by src may contain some kernel + * data modified by kmap_atomic() + */ + safe_copy_page(buffer, s_page); + dst = kmap_atomic(d_page); + copy_page(dst, buffer); + kunmap_atomic(dst); + } else { + safe_copy_page(page_address(d_page), s_page); + } + } +} +#else +#define page_is_saveable(zone, pfn) saveable_page(zone, pfn) + +static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) +{ + safe_copy_page(page_address(pfn_to_page(dst_pfn)), + pfn_to_page(src_pfn)); +} +#endif /* CONFIG_HIGHMEM */ + +static void +copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) +{ + struct zone *zone; + unsigned long pfn; + #ifdef _PRINT_PAGE_CRC_ //for debug.by roger. + unsigned long crc_le; + unsigned char *virt_addr; + #endif + + for_each_populated_zone(zone) { + unsigned long max_zone_pfn; + + mark_free_pages(zone); + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (page_is_saveable(zone, pfn)) + memory_bm_set_bit(orig_bm, pfn); + } + memory_bm_position_reset(orig_bm); + memory_bm_position_reset(copy_bm); + for(;;) { + pfn = memory_bm_next_pfn(orig_bm); + if (unlikely(pfn == BM_END_OF_MAP)) + break; + copy_data_page(memory_bm_next_pfn(copy_bm), pfn); + #ifdef _PRINT_PAGE_CRC_ //for debug.by roger. + virt_addr = page_address(pfn_to_page(pfn)); + crc_le = crc32_le(0, virt_addr, PAGE_SIZE); + printk("pfn:%lu:phy_addr:0x%x:crc:%lu:virt_addr:0x%x\n", + pfn, pfn*PAGE_SIZE, crc_le, virt_addr); + #endif + } +} + +/* Total number of image pages */ +static unsigned int nr_copy_pages; +/* Number of pages needed for saving the original pfns of the image pages */ +static unsigned int nr_meta_pages; +/* + * Numbers of normal and highmem page frames allocated for hibernation image + * before suspending devices. + */ +unsigned int alloc_normal, alloc_highmem; +/* + * Memory bitmap used for marking saveable pages (during hibernation) or + * hibernation image pages (during restore) + */ +static struct memory_bitmap orig_bm; +/* + * Memory bitmap used during hibernation for marking allocated page frames that + * will contain copies of saveable pages. During restore it is initially used + * for marking hibernation image pages, but then the set bits from it are + * duplicated in @orig_bm and it is released. On highmem systems it is next + * used for marking "safe" highmem pages, but it has to be reinitialized for + * this purpose. + */ +static struct memory_bitmap copy_bm; + +/** + * swsusp_free - free pages allocated for the suspend. + * + * Suspend pages are alocated before the atomic copy is made, so we + * need to release them after the resume. + */ + +void swsusp_free_page(void *buffer) +{ + if (swsusp_page_is_forbidden(virt_to_page(buffer)) && + swsusp_page_is_free(virt_to_page(buffer))) { + swsusp_unset_page_forbidden(virt_to_page(buffer)); + swsusp_unset_page_free(virt_to_page(buffer)); + __free_page(virt_to_page(buffer)); + } +} + +void swsusp_free(void) +{ + struct zone *zone; + unsigned long pfn, max_zone_pfn; + + for_each_populated_zone(zone) { + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + + if (swsusp_page_is_forbidden(page) && + swsusp_page_is_free(page)) { + swsusp_unset_page_forbidden(page); + swsusp_unset_page_free(page); + __free_page(page); + } + } + } + nr_copy_pages = 0; + nr_meta_pages = 0; + restore_pblist = NULL; + buffer = NULL; + alloc_normal = 0; + alloc_highmem = 0; +} + +/* Helper functions used for the shrinking of memory. */ + +#define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) + +/** + * preallocate_image_pages - Allocate a number of pages for hibernation image + * @nr_pages: Number of page frames to allocate. + * @mask: GFP flags to use for the allocation. + * + * Return value: Number of page frames actually allocated + */ +static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) +{ + unsigned long nr_alloc = 0; + + while (nr_pages > 0) { + struct page *page; + + page = alloc_image_page(mask); + if (!page) + break; + memory_bm_set_bit(©_bm, page_to_pfn(page)); + if (PageHighMem(page)) + alloc_highmem++; + else + alloc_normal++; + nr_pages--; + nr_alloc++; + } + + return nr_alloc; +} + +static unsigned long preallocate_image_memory(unsigned long nr_pages, + unsigned long avail_normal) +{ + unsigned long alloc; + + if (avail_normal <= alloc_normal) + return 0; + + alloc = avail_normal - alloc_normal; + if (nr_pages < alloc) + alloc = nr_pages; + + return preallocate_image_pages(alloc, GFP_IMAGE); +} + +#ifdef CONFIG_HIGHMEM +static unsigned long preallocate_image_highmem(unsigned long nr_pages) +{ + return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); +} + +/** + * __fraction - Compute (an approximation of) x * (multiplier / base) + */ +static unsigned long __fraction(u64 x, u64 multiplier, u64 base) +{ + x *= multiplier; + do_div(x, base); + return (unsigned long)x; +} + +static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, + unsigned long highmem, + unsigned long total) +{ + unsigned long alloc = __fraction(nr_pages, highmem, total); + + return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); +} +#else /* CONFIG_HIGHMEM */ +static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) +{ + return 0; +} + +static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, + unsigned long highmem, + unsigned long total) +{ + return 0; +} +#endif /* CONFIG_HIGHMEM */ + +/** + * free_unnecessary_pages - Release preallocated pages not needed for the image + */ +static void free_unnecessary_pages(void) +{ + unsigned long save, to_free_normal, to_free_highmem; + + save = count_data_pages(); + if (alloc_normal >= save) { + to_free_normal = alloc_normal - save; + save = 0; + } else { + to_free_normal = 0; + save -= alloc_normal; + } + save += count_highmem_pages(); + if (alloc_highmem >= save) { + to_free_highmem = alloc_highmem - save; + } else { + to_free_highmem = 0; + save -= alloc_highmem; + if (to_free_normal > save) + to_free_normal -= save; + else + to_free_normal = 0; + } + + memory_bm_position_reset(©_bm); + + while (to_free_normal > 0 || to_free_highmem > 0) { + unsigned long pfn = memory_bm_next_pfn(©_bm); + struct page *page = pfn_to_page(pfn); + + if (PageHighMem(page)) { + if (!to_free_highmem) + continue; + to_free_highmem--; + alloc_highmem--; + } else { + if (!to_free_normal) + continue; + to_free_normal--; + alloc_normal--; + } + memory_bm_clear_bit(©_bm, pfn); + swsusp_unset_page_forbidden(page); + swsusp_unset_page_free(page); + __free_page(page); + } +} + +/** + * minimum_image_size - Estimate the minimum acceptable size of an image + * @saveable: Number of saveable pages in the system. + * + * We want to avoid attempting to free too much memory too hard, so estimate the + * minimum acceptable size of a hibernation image to use as the lower limit for + * preallocating memory. + * + * We assume that the minimum image size should be proportional to + * + * [number of saveable pages] - [number of pages that can be freed in theory] + * + * where the second term is the sum of (1) reclaimable slab pages, (2) active + * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages, + * minus mapped file pages. + */ +static unsigned long minimum_image_size(unsigned long saveable) +{ + unsigned long size; + + size = global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_ACTIVE_ANON) + + global_page_state(NR_INACTIVE_ANON) + + global_page_state(NR_ACTIVE_FILE) + + global_page_state(NR_INACTIVE_FILE) + - global_page_state(NR_FILE_MAPPED); + + return saveable <= size ? 0 : saveable - size; +} + +/** + * hibernate_preallocate_memory - Preallocate memory for hibernation image + * + * To create a hibernation image it is necessary to make a copy of every page + * frame in use. We also need a number of page frames to be free during + * hibernation for allocations made while saving the image and for device + * drivers, in case they need to allocate memory from their hibernation + * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough + * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through + * /sys/power/reserved_size, respectively). To make this happen, we compute the + * total number of available page frames and allocate at least + * + * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) + * + * of them, which corresponds to the maximum size of a hibernation image. + * + * If image_size is set below the number following from the above formula, + * the preallocation of memory is continued until the total number of saveable + * pages in the system is below the requested image size or the minimum + * acceptable image size returned by minimum_image_size(), whichever is greater. + */ +int hibernate_preallocate_memory(void) +{ + struct zone *zone; + unsigned long saveable, size, max_size, count, highmem, pages = 0; + unsigned long alloc, save_highmem, pages_highmem, avail_normal; + struct timeval start, stop; + int error; + unsigned long shrinked_size;//for shrink_all_memory + unsigned long shrink_size; //for shrink_all_memory + int i; //for shrink_all_memory + + printk(KERN_INFO "PM: Preallocating image memory... "); + do_gettimeofday(&start); + + error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); + if (error) + goto err_out; + + error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); + if (error) + goto err_out; + + alloc_normal = 0; + alloc_highmem = 0; + + /* Count the number of saveable data pages. */ + save_highmem = count_highmem_pages(); + saveable = count_data_pages(); + + /* + * Compute the total number of page frames we can use (count) and the + * number of pages needed for image metadata (size). + */ + count = saveable; + saveable += save_highmem; + highmem = save_highmem; + size = 0; + for_each_populated_zone(zone) { + size += snapshot_additional_pages(zone); + if (is_highmem(zone)) + highmem += zone_page_state(zone, NR_FREE_PAGES); + else + count += zone_page_state(zone, NR_FREE_PAGES); + } + avail_normal = count; + count += highmem; + count -= totalreserve_pages; + + /* Add number of pages required for page keys (s390 only). */ + size += page_key_additional_pages(saveable); + + /* Compute the maximum number of saveable pages to leave in memory. */ + max_size = (count - (size + PAGES_FOR_IO)) / 2 + - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); + /* Compute the desired number of image pages specified by image_size. */ + size = DIV_ROUND_UP(image_size, PAGE_SIZE); + if (size > max_size) + size = max_size; + printk(KERN_CRIT"totalreserve_pages:%lu MB : "\ + "reserved_size:%lu MB\n", + totalreserve_pages/256, reserved_size>>20); +#if 0 + /*Skip this part to Force to run shrink_all_memory*/ + /* + * If the desired number of image pages is at least as large as the + * current number of saveable pages in memory, allocate page frames for + * the image and we're done. + */ + if (size >= saveable) { + pages = preallocate_image_highmem(save_highmem); + pages += preallocate_image_memory(saveable - pages, avail_normal); + goto out; + } +#endif + /* Estimate the minimum size of the image. */ + pages = minimum_image_size(saveable); + printk(KERN_CRIT"minimum_image_size:%lu MB\n", pages/256); + /* + * To avoid excessive pressure on the normal zone, leave room in it to + * accommodate an image of the minimum size (unless it's already too + * small, in which case don't preallocate pages from it at all). + */ + if (avail_normal > pages) + avail_normal -= pages; + else + avail_normal = 0; +#if 0 + if (size < pages) + size = min_t(unsigned long, pages, max_size); +#else + size = pages;//make sure size=minimum_image_size so that shrink_size is valid. +#endif + /* + * Let the memory management subsystem know that we're going to need a + * large number of page frames to allocate and make it free some memory. + * NOTE: If this is not done, performance will be hurt badly in some + * test cases. + */ + shrink_size = (saveable - size) >0 ? (saveable - size) : 0; + + printk(KERN_CRIT "\nsaveable:%lu MB : size:%lu MB : shrink_size : %lu MB\n", + saveable/256, size/256, shrink_size/256); + + for (shrinked_size=0, i=0; i<5; i++){ + shrinked_size += shrink_all_memory(shrink_size); + } + printk(KERN_CRIT "shrink_all_memory: %lu MB\n", shrinked_size/256); + /* + * The number of saveable pages in memory was too high, so apply some + * pressure to decrease it. First, make room for the largest possible + * image and fail if that doesn't work. Next, try to decrease the size + * of the image as much as indicated by 'size' using allocations from + * highmem and non-highmem zones separately. + */ + pages_highmem = preallocate_image_highmem(highmem / 2); + //alloc = (count - max_size) - pages_highmem; //original, which wastes memory. + //alloc = saveable - shrinked_size - pages_highmem; //only alloc we actually need. + alloc = (count - max_size) > (saveable - shrinked_size) ? + (saveable - shrinked_size) - pages_highmem + :(count - max_size) - pages_highmem; + pages = preallocate_image_memory(alloc, avail_normal); + printk("\n%s:alloc(MB):%lu:count(MB):%lu:max_size(MB):%lu:pages_highmem(MB):%lu\n", + __FUNCTION__, alloc/256, count/256, max_size/256, pages_highmem/256); + printk("%s:prealloc pages(MB):%lu:avail_normal(MB):%lu\n", __FUNCTION__, pages/256, avail_normal/256); + + if (pages < alloc) { + printk(KERN_CRIT"Not Enough Pages in LowMem. Trying HighMem\n"); + /* We have exhausted non-highmem pages, try highmem. */ + alloc -= pages; + pages += pages_highmem; + pages_highmem = preallocate_image_highmem(alloc); + if (pages_highmem < alloc){ + printk(KERN_CRIT"Can not Preallocate Enough Pages.\n"); + goto err_out; + } + pages += pages_highmem; + /* + * size is the desired number of saveable pages to leave in + * memory, so try to preallocate (all memory - size) pages. + */ + alloc = (count - pages) - size; + pages += preallocate_image_highmem(alloc); + } else { + /* + * There are approximately max_size saveable pages at this point + * and we want to reduce this number down to size. + */ + #if 0 + /*skip this to avoid Wasting too much time on shrinking to + reduce image size + */ + alloc = max_size - size; + size = preallocate_highmem_fraction(alloc, highmem, count); + pages_highmem += size; + alloc -= size; + size = preallocate_image_memory(alloc, avail_normal); + pages_highmem += preallocate_image_highmem(alloc - size); + pages += pages_highmem + size; + #endif + } + + /* + * We only need as many page frames for the image as there are saveable + * pages in memory, but we have allocated more. Release the excessive + * ones now. + */ + free_unnecessary_pages(); + + out: + do_gettimeofday(&stop); + printk(KERN_CONT "done (allocated %lu pages)\n", pages); + swsusp_show_speed(&start, &stop, pages, "Allocated"); + + return 0; + + err_out: + printk(KERN_CONT "Can not Pre-Allocate Enough Pages\n"); + swsusp_free(); + return -ENOMEM; +} + +#ifdef CONFIG_HIGHMEM +/** + * count_pages_for_highmem - compute the number of non-highmem pages + * that will be necessary for creating copies of highmem pages. + */ + +static unsigned int count_pages_for_highmem(unsigned int nr_highmem) +{ + unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; + + if (free_highmem >= nr_highmem) + nr_highmem = 0; + else + nr_highmem -= free_highmem; + + return nr_highmem; +} +#else +static unsigned int +count_pages_for_highmem(unsigned int nr_highmem) { return 0; } +#endif /* CONFIG_HIGHMEM */ + +/** + * enough_free_mem - Make sure we have enough free memory for the + * snapshot image. + */ + +static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) +{ + struct zone *zone; + unsigned int free = alloc_normal; + + for_each_populated_zone(zone) + if (!is_highmem(zone)) + free += zone_page_state(zone, NR_FREE_PAGES); + + nr_pages += count_pages_for_highmem(nr_highmem); + pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n", + nr_pages, PAGES_FOR_IO, free); + + return free > nr_pages + PAGES_FOR_IO; +} + +#ifdef CONFIG_HIGHMEM +/** + * get_highmem_buffer - if there are some highmem pages in the suspend + * image, we may need the buffer to copy them and/or load their data. + */ + +static inline int get_highmem_buffer(int safe_needed) +{ + buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); + return buffer ? 0 : -ENOMEM; +} + +/** + * alloc_highmem_image_pages - allocate some highmem pages for the image. + * Try to allocate as many pages as needed, but if the number of free + * highmem pages is lesser than that, allocate them all. + */ + +static inline unsigned int +alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) +{ + unsigned int to_alloc = count_free_highmem_pages(); + + if (to_alloc > nr_highmem) + to_alloc = nr_highmem; + + nr_highmem -= to_alloc; + while (to_alloc-- > 0) { + struct page *page; + + page = alloc_image_page(__GFP_HIGHMEM); + memory_bm_set_bit(bm, page_to_pfn(page)); + } + return nr_highmem; +} +#else +static inline int get_highmem_buffer(int safe_needed) { return 0; } + +static inline unsigned int +alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } +#endif /* CONFIG_HIGHMEM */ + +/** + * swsusp_alloc - allocate memory for the suspend image + * + * We first try to allocate as many highmem pages as there are + * saveable highmem pages in the system. If that fails, we allocate + * non-highmem pages for the copies of the remaining highmem ones. + * + * In this approach it is likely that the copies of highmem pages will + * also be located in the high memory, because of the way in which + * copy_data_pages() works. + */ + +static int +swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, + unsigned int nr_pages, unsigned int nr_highmem) +{ + if (nr_highmem > 0) { + if (get_highmem_buffer(PG_ANY)) + goto err_out; + if (nr_highmem > alloc_highmem) { + nr_highmem -= alloc_highmem; + nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); + } + } + if (nr_pages > alloc_normal) { + nr_pages -= alloc_normal; + while (nr_pages-- > 0) { + struct page *page; + + page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); + if (!page) + goto err_out; + memory_bm_set_bit(copy_bm, page_to_pfn(page)); + } + } + + return 0; + + err_out: + swsusp_free(); + return -ENOMEM; +} + +asmlinkage int swsusp_save(void) +{ + unsigned int nr_pages, nr_highmem; + + printk(KERN_INFO "PM: Creating hibernation image:\n"); + + drain_local_pages(NULL); + nr_pages = count_data_pages(); + nr_highmem = count_highmem_pages(); + printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem); + + if (!enough_free_mem(nr_pages, nr_highmem)) { + printk(KERN_ERR "PM: Not enough free memory\n"); + return -ENOMEM; + } + + if (swsusp_alloc(&orig_bm, ©_bm, nr_pages, nr_highmem)) { + printk(KERN_ERR "PM: Memory allocation failed\n"); + return -ENOMEM; + } + + /* During allocating of suspend pagedir, new cold pages may appear. + * Kill them. + */ + drain_local_pages(NULL); + copy_data_pages(©_bm, &orig_bm); + + /* + * End of critical section. From now on, we can write to memory, + * but we should not touch disk. This specially means we must _not_ + * touch swap space! Except we must write out our image of course. + */ + + nr_pages += nr_highmem; + nr_copy_pages = nr_pages; + nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); + + printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n", + nr_pages); + + return 0; +} + +#ifndef CONFIG_ARCH_HIBERNATION_HEADER +static int init_header_complete(struct swsusp_info *info) +{ + memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); + info->version_code = LINUX_VERSION_CODE; + swsusp_arch_add_info(info->archdata, sizeof(info->archdata));//save extra info for u-boot resume. + return 0; +} + +static char *check_image_kernel(struct swsusp_info *info) +{ + extern pgd_t *idmap_pgd;/*check if idmap_pgd changed.*/ + + if (info->version_code != LINUX_VERSION_CODE) + return "kernel version"; + if (strcmp(info->uts.sysname,init_utsname()->sysname)) + return "system type"; + if (strcmp(info->uts.release,init_utsname()->release)) + return "kernel release"; + if (strcmp(info->uts.version,init_utsname()->version)) + return "version"; + if (strcmp(info->uts.machine,init_utsname()->machine)) + return "machine"; + /* If idmap_pgd changed, mmu page table will be corrupted during hibernation restore */ + printk("\n check_image_kernel:\n info->archdata[4]=0x%x, idmap_pgd=0x%x\n", ((u32 *)info->archdata)[4],(u32)idmap_pgd); + if (((u32 *)info->archdata)[4] != (u32)idmap_pgd) + return "idmap_pgd changed"; + return NULL; +} +#endif /* CONFIG_ARCH_HIBERNATION_HEADER */ + +void __weak swsusp_arch_add_info(char *archdata, size_t size) {} + +unsigned long snapshot_get_image_size(void) +{ + return nr_copy_pages + nr_meta_pages + 1; +} + +static int init_header(struct swsusp_info *info) +{ + memset(info, 0, sizeof(struct swsusp_info)); + info->num_physpages = num_physpages; + info->image_pages = nr_copy_pages; + info->pages = snapshot_get_image_size(); + info->size = info->pages; + info->size <<= PAGE_SHIFT; + return init_header_complete(info); +} + +/** + * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm + * are stored in the array @buf[] (1 page at a time) + */ + +static inline void +pack_pfns(unsigned long *buf, struct memory_bitmap *bm) +{ + int j; + + for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { + buf[j] = memory_bm_next_pfn(bm); + if (unlikely(buf[j] == BM_END_OF_MAP)) + break; + /* Save page key for data page (s390 only). */ + page_key_read(buf + j); + } +} + +/** + * snapshot_read_next - used for reading the system memory snapshot. + * + * On the first call to it @handle should point to a zeroed + * snapshot_handle structure. The structure gets updated and a pointer + * to it should be passed to this function every next time. + * + * On success the function returns a positive number. Then, the caller + * is allowed to read up to the returned number of bytes from the memory + * location computed by the data_of() macro. + * + * The function returns 0 to indicate the end of data stream condition, + * and a negative number is returned on error. In such cases the + * structure pointed to by @handle is not updated and should not be used + * any more. + */ + +int snapshot_read_next(struct snapshot_handle *handle) +{ + if (handle->cur > nr_meta_pages + nr_copy_pages) + return 0; + + if (!buffer) { + /* This makes the buffer be freed by swsusp_free() */ + buffer = get_image_page(GFP_ATOMIC, PG_ANY); + if (!buffer) + return -ENOMEM; + } + if (!handle->cur) { + int error; + + error = init_header((struct swsusp_info *)buffer); + if (error) + return error; + handle->buffer = buffer; + memory_bm_position_reset(&orig_bm); + memory_bm_position_reset(©_bm); + } else if (handle->cur <= nr_meta_pages) { + clear_page(buffer); + pack_pfns(buffer, &orig_bm); + } else { + struct page *page; + + page = pfn_to_page(memory_bm_next_pfn(©_bm)); + if (PageHighMem(page)) { + /* Highmem pages are copied to the buffer, + * because we can't return with a kmapped + * highmem page (we may not be called again). + */ + void *kaddr; + + kaddr = kmap_atomic(page); + copy_page(buffer, kaddr); + kunmap_atomic(kaddr); + handle->buffer = buffer; + } else { + handle->buffer = page_address(page); + } + } + handle->cur++; + return PAGE_SIZE; +} + +/** + * mark_unsafe_pages - mark the pages that cannot be used for storing + * the image during resume, because they conflict with the pages that + * had been used before suspend + */ + +static int mark_unsafe_pages(struct memory_bitmap *bm) +{ + struct zone *zone; + unsigned long pfn, max_zone_pfn; + + /* Clear page flags */ + for_each_populated_zone(zone) { + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (pfn_valid(pfn)) + swsusp_unset_page_free(pfn_to_page(pfn)); + } + + /* Mark pages that correspond to the "original" pfns as "unsafe" */ + memory_bm_position_reset(bm); + do { + pfn = memory_bm_next_pfn(bm); + if (likely(pfn != BM_END_OF_MAP)) { + if (likely(pfn_valid(pfn))) + swsusp_set_page_free(pfn_to_page(pfn)); + else + return -EFAULT; + } + } while (pfn != BM_END_OF_MAP); + + allocated_unsafe_pages = 0; + + return 0; +} + +static void +duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) +{ + unsigned long pfn; + + memory_bm_position_reset(src); + pfn = memory_bm_next_pfn(src); + while (pfn != BM_END_OF_MAP) { + memory_bm_set_bit(dst, pfn); + pfn = memory_bm_next_pfn(src); + } +} + +static int check_header(struct swsusp_info *info) +{ + char *reason; + + reason = check_image_kernel(info); + if (!reason && info->num_physpages != num_physpages) + reason = "memory size"; + if (reason) { + printk(KERN_ERR "PM: Image mismatch: %s\n", reason); + return -EPERM; + } + return 0; +} + +/** + * load header - check the image header and copy data from it + */ + +static int +load_header(struct swsusp_info *info) +{ + int error; + + restore_pblist = NULL; + error = check_header(info); + if (!error) { + nr_copy_pages = info->image_pages; + nr_meta_pages = info->pages - info->image_pages - 1; + } + return error; +} + +/** + * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set + * the corresponding bit in the memory bitmap @bm + */ +static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) +{ + int j; + + for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { + if (unlikely(buf[j] == BM_END_OF_MAP)) + break; + + /* Extract and buffer page key for data page (s390 only). */ + page_key_memorize(buf + j); + + if (memory_bm_pfn_present(bm, buf[j])) + memory_bm_set_bit(bm, buf[j]); + else + return -EFAULT; + } + + return 0; +} + +/* List of "safe" pages that may be used to store data loaded from the suspend + * image + */ +static struct linked_page *safe_pages_list; + +#ifdef CONFIG_HIGHMEM +/* struct highmem_pbe is used for creating the list of highmem pages that + * should be restored atomically during the resume from disk, because the page + * frames they have occupied before the suspend are in use. + */ +struct highmem_pbe { + struct page *copy_page; /* data is here now */ + struct page *orig_page; /* data was here before the suspend */ + struct highmem_pbe *next; +}; + +/* List of highmem PBEs needed for restoring the highmem pages that were + * allocated before the suspend and included in the suspend image, but have + * also been allocated by the "resume" kernel, so their contents cannot be + * written directly to their "original" page frames. + */ +static struct highmem_pbe *highmem_pblist; + +/** + * count_highmem_image_pages - compute the number of highmem pages in the + * suspend image. The bits in the memory bitmap @bm that correspond to the + * image pages are assumed to be set. + */ + +static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) +{ + unsigned long pfn; + unsigned int cnt = 0; + + memory_bm_position_reset(bm); + pfn = memory_bm_next_pfn(bm); + while (pfn != BM_END_OF_MAP) { + if (PageHighMem(pfn_to_page(pfn))) + cnt++; + + pfn = memory_bm_next_pfn(bm); + } + return cnt; +} + +/** + * prepare_highmem_image - try to allocate as many highmem pages as + * there are highmem image pages (@nr_highmem_p points to the variable + * containing the number of highmem image pages). The pages that are + * "safe" (ie. will not be overwritten when the suspend image is + * restored) have the corresponding bits set in @bm (it must be + * unitialized). + * + * NOTE: This function should not be called if there are no highmem + * image pages. + */ + +static unsigned int safe_highmem_pages; + +static struct memory_bitmap *safe_highmem_bm; + +static int +prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) +{ + unsigned int to_alloc; + + if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) + return -ENOMEM; + + if (get_highmem_buffer(PG_SAFE)) + return -ENOMEM; + + to_alloc = count_free_highmem_pages(); + if (to_alloc > *nr_highmem_p) + to_alloc = *nr_highmem_p; + else + *nr_highmem_p = to_alloc; + + safe_highmem_pages = 0; + while (to_alloc-- > 0) { + struct page *page; + + page = alloc_page(__GFP_HIGHMEM); + if (!swsusp_page_is_free(page)) { + /* The page is "safe", set its bit the bitmap */ + memory_bm_set_bit(bm, page_to_pfn(page)); + safe_highmem_pages++; + } + /* Mark the page as allocated */ + swsusp_set_page_forbidden(page); + swsusp_set_page_free(page); + } + memory_bm_position_reset(bm); + safe_highmem_bm = bm; + return 0; +} + +/** + * get_highmem_page_buffer - for given highmem image page find the buffer + * that suspend_write_next() should set for its caller to write to. + * + * If the page is to be saved to its "original" page frame or a copy of + * the page is to be made in the highmem, @buffer is returned. Otherwise, + * the copy of the page is to be made in normal memory, so the address of + * the copy is returned. + * + * If @buffer is returned, the caller of suspend_write_next() will write + * the page's contents to @buffer, so they will have to be copied to the + * right location on the next call to suspend_write_next() and it is done + * with the help of copy_last_highmem_page(). For this purpose, if + * @buffer is returned, @last_highmem page is set to the page to which + * the data will have to be copied from @buffer. + */ + +static struct page *last_highmem_page; + +static void * +get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) +{ + struct highmem_pbe *pbe; + void *kaddr; + + if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { + /* We have allocated the "original" page frame and we can + * use it directly to store the loaded page. + */ + last_highmem_page = page; + return buffer; + } + /* The "original" page frame has not been allocated and we have to + * use a "safe" page frame to store the loaded page. + */ + pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); + if (!pbe) { + swsusp_free(); + return ERR_PTR(-ENOMEM); + } + pbe->orig_page = page; + if (safe_highmem_pages > 0) { + struct page *tmp; + + /* Copy of the page will be stored in high memory */ + kaddr = buffer; + tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); + safe_highmem_pages--; + last_highmem_page = tmp; + pbe->copy_page = tmp; + } else { + /* Copy of the page will be stored in normal memory */ + kaddr = safe_pages_list; + safe_pages_list = safe_pages_list->next; + pbe->copy_page = virt_to_page(kaddr); + } + pbe->next = highmem_pblist; + highmem_pblist = pbe; + return kaddr; +} + +/** + * copy_last_highmem_page - copy the contents of a highmem image from + * @buffer, where the caller of snapshot_write_next() has place them, + * to the right location represented by @last_highmem_page . + */ + +static void copy_last_highmem_page(void) +{ + if (last_highmem_page) { + void *dst; + + dst = kmap_atomic(last_highmem_page); + copy_page(dst, buffer); + kunmap_atomic(dst); + last_highmem_page = NULL; + } +} + +static inline int last_highmem_page_copied(void) +{ + return !last_highmem_page; +} + +static inline void free_highmem_data(void) +{ + if (safe_highmem_bm) + memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); + + if (buffer) + free_image_page(buffer, PG_UNSAFE_CLEAR); +} +#else +static inline int get_safe_write_buffer(void) { return 0; } + +static unsigned int +count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } + +static inline int +prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) +{ + return 0; +} + +static inline void * +get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) +{ + return ERR_PTR(-EINVAL); +} + +static inline void copy_last_highmem_page(void) {} +static inline int last_highmem_page_copied(void) { return 1; } +static inline void free_highmem_data(void) {} +#endif /* CONFIG_HIGHMEM */ + +/** + * prepare_image - use the memory bitmap @bm to mark the pages that will + * be overwritten in the process of restoring the system memory state + * from the suspend image ("unsafe" pages) and allocate memory for the + * image. + * + * The idea is to allocate a new memory bitmap first and then allocate + * as many pages as needed for the image data, but not to assign these + * pages to specific tasks initially. Instead, we just mark them as + * allocated and create a lists of "safe" pages that will be used + * later. On systems with high memory a list of "safe" highmem pages is + * also created. + */ + +#define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) + +static int +prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) +{ + unsigned int nr_pages, nr_highmem; + struct linked_page *sp_list, *lp; + int error; + + /* If there is no highmem, the buffer will not be necessary */ + free_image_page(buffer, PG_UNSAFE_CLEAR); + buffer = NULL; + + nr_highmem = count_highmem_image_pages(bm); + error = mark_unsafe_pages(bm); + if (error) + goto Free; + + error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); + if (error) + goto Free; + + duplicate_memory_bitmap(new_bm, bm); + memory_bm_free(bm, PG_UNSAFE_KEEP); + if (nr_highmem > 0) { + error = prepare_highmem_image(bm, &nr_highmem); + if (error) + goto Free; + } + /* Reserve some safe pages for potential later use. + * + * NOTE: This way we make sure there will be enough safe pages for the + * chain_alloc() in get_buffer(). It is a bit wasteful, but + * nr_copy_pages cannot be greater than 50% of the memory anyway. + */ + sp_list = NULL; + /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ + nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; + nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); + while (nr_pages > 0) { + lp = get_image_page(GFP_ATOMIC, PG_SAFE); + if (!lp) { + error = -ENOMEM; + goto Free; + } + lp->next = sp_list; + sp_list = lp; + nr_pages--; + } + /* Preallocate memory for the image */ + safe_pages_list = NULL; + nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; + while (nr_pages > 0) { + lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); + if (!lp) { + error = -ENOMEM; + goto Free; + } + if (!swsusp_page_is_free(virt_to_page(lp))) { + /* The page is "safe", add it to the list */ + lp->next = safe_pages_list; + safe_pages_list = lp; + } + /* Mark the page as allocated */ + swsusp_set_page_forbidden(virt_to_page(lp)); + swsusp_set_page_free(virt_to_page(lp)); + nr_pages--; + } + /* Free the reserved safe pages so that chain_alloc() can use them */ + while (sp_list) { + lp = sp_list->next; + free_image_page(sp_list, PG_UNSAFE_CLEAR); + sp_list = lp; + } + return 0; + + Free: + swsusp_free(); + return error; +} + +/** + * get_buffer - compute the address that snapshot_write_next() should + * set for its caller to write to. + */ +#ifdef _PRINT_PAGE_CRC_ + int is_original_addr=0;//flag to indicate if buffer address is the origianl or not. +#endif + +static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) +{ + struct pbe *pbe; + struct page *page; + unsigned long pfn = memory_bm_next_pfn(bm); + + if (pfn == BM_END_OF_MAP) + return ERR_PTR(-EFAULT); + + page = pfn_to_page(pfn); + if (PageHighMem(page)) + return get_highmem_page_buffer(page, ca); + +#ifdef _PRINT_PAGE_CRC_ + if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) + { /* We have allocated the "original" page frame and we can + * use it directly to store the loaded page. + */ + + is_original_addr=1; + + return page_address(page); + } +#else + if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) + /* We have allocated the "original" page frame and we can + * use it directly to store the loaded page. + */ + return page_address(page); +#endif + + /* The "original" page frame has not been allocated and we have to + * use a "safe" page frame to store the loaded page. + */ + #ifdef _PRINT_PAGE_CRC_ + is_original_addr = 0; + #endif + pbe = chain_alloc(ca, sizeof(struct pbe)); + if (!pbe) { + swsusp_free(); + return ERR_PTR(-ENOMEM); + } + pbe->orig_address = page_address(page); + pbe->address = safe_pages_list; + safe_pages_list = safe_pages_list->next; + pbe->next = restore_pblist; + restore_pblist = pbe; + return pbe->address; +} + +/** + * snapshot_write_next - used for writing the system memory snapshot. + * + * On the first call to it @handle should point to a zeroed + * snapshot_handle structure. The structure gets updated and a pointer + * to it should be passed to this function every next time. + * + * On success the function returns a positive number. Then, the caller + * is allowed to write up to the returned number of bytes to the memory + * location computed by the data_of() macro. + * + * The function returns 0 to indicate the "end of file" condition, + * and a negative number is returned on error. In such cases the + * structure pointed to by @handle is not updated and should not be used + * any more. + */ + +int snapshot_write_next(struct snapshot_handle *handle) +{ + static struct chain_allocator ca; + int error = 0; + + /* Check if we have already loaded the entire image */ + if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) + return 0; + + handle->sync_read = 1; + + if (!handle->cur) { + if (!buffer) + /* This makes the buffer be freed by swsusp_free() */ + buffer = get_image_page(GFP_ATOMIC, PG_ANY); + + if (!buffer) + return -ENOMEM; + + handle->buffer = buffer; + } else if (handle->cur == 1) { + error = load_header(buffer); + if (error) + return error; + + error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); + if (error) + return error; + + /* Allocate buffer for page keys. */ + error = page_key_alloc(nr_copy_pages); + if (error) + return error; + + } else if (handle->cur <= nr_meta_pages + 1) { + error = unpack_orig_pfns(buffer, ©_bm); + if (error) + return error; + + if (handle->cur == nr_meta_pages + 1) { + error = prepare_image(&orig_bm, ©_bm); + if (error) + return error; + + chain_init(&ca, GFP_ATOMIC, PG_SAFE); + memory_bm_position_reset(&orig_bm); + restore_pblist = NULL; + handle->buffer = get_buffer(&orig_bm, &ca); + handle->sync_read = 0; + if (IS_ERR(handle->buffer)) + return PTR_ERR(handle->buffer); + } + } else { + copy_last_highmem_page(); + /* Restore page key for data page (s390 only). */ + page_key_write(handle->buffer); + handle->buffer = get_buffer(&orig_bm, &ca); + if (IS_ERR(handle->buffer)) + return PTR_ERR(handle->buffer); + if (handle->buffer != buffer) + handle->sync_read = 0; + } + handle->cur++; + return PAGE_SIZE; +} + +/** + * snapshot_write_finalize - must be called after the last call to + * snapshot_write_next() in case the last page in the image happens + * to be a highmem page and its contents should be stored in the + * highmem. Additionally, it releases the memory that will not be + * used any more. + */ + +void snapshot_write_finalize(struct snapshot_handle *handle) +{ + copy_last_highmem_page(); + /* Restore page key for data page (s390 only). */ + page_key_write(handle->buffer); + page_key_free(); + /* Free only if we have loaded the image entirely */ + if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { + memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); + free_highmem_data(); + } +} + +int snapshot_image_loaded(struct snapshot_handle *handle) +{ + return !(!nr_copy_pages || !last_highmem_page_copied() || + handle->cur <= nr_meta_pages + nr_copy_pages); +} + +#ifdef CONFIG_HIGHMEM +/* Assumes that @buf is ready and points to a "safe" page */ +static inline void +swap_two_pages_data(struct page *p1, struct page *p2, void *buf) +{ + void *kaddr1, *kaddr2; + + kaddr1 = kmap_atomic(p1); + kaddr2 = kmap_atomic(p2); + copy_page(buf, kaddr1); + copy_page(kaddr1, kaddr2); + copy_page(kaddr2, buf); + kunmap_atomic(kaddr2); + kunmap_atomic(kaddr1); +} + +/** + * restore_highmem - for each highmem page that was allocated before + * the suspend and included in the suspend image, and also has been + * allocated by the "resume" kernel swap its current (ie. "before + * resume") contents with the previous (ie. "before suspend") one. + * + * If the resume eventually fails, we can call this function once + * again and restore the "before resume" highmem state. + */ + +int restore_highmem(void) +{ + struct highmem_pbe *pbe = highmem_pblist; + void *buf; + + if (!pbe) + return 0; + + buf = get_image_page(GFP_ATOMIC, PG_SAFE); + if (!buf) + return -ENOMEM; + + while (pbe) { + swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); + pbe = pbe->next; + } + free_image_page(buf, PG_UNSAFE_CLEAR); + return 0; +} +#endif /* CONFIG_HIGHMEM */ diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c new file mode 100644 index 00000000..c2868cb7 --- /dev/null +++ b/kernel/power/suspend.c @@ -0,0 +1,437 @@ +/* + * kernel/power/suspend.c - Suspend to RAM and standby functionality. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/console.h> +#include <linux/cpu.h> +#include <linux/syscalls.h> +#include <linux/gfp.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/suspend.h> +#include <linux/syscore_ops.h> +#include <linux/rtc.h> +#include <trace/events/power.h> +#include <linux/cpufreq.h> + +#include <mach/hardware.h> +#include <mach/wmt_secure.h> + +#include "power.h" + +unsigned int pm_flags; +EXPORT_SYMBOL(pm_flags); + +#ifdef CONFIG_KEYBOARD_WMT +extern void wmt_kpad_int_ctrl(int state); +#endif + +const char *const pm_states[PM_SUSPEND_MAX] = { + [PM_SUSPEND_STANDBY] = "standby", + [PM_SUSPEND_MEM] = "mem", +}; + +extern unsigned int WMT_WAKE_UP_EVENT; + +static const struct platform_suspend_ops *suspend_ops; + +/** + * suspend_set_ops - Set the global suspend method table. + * @ops: Suspend operations to use. + */ +void suspend_set_ops(const struct platform_suspend_ops *ops) +{ + lock_system_sleep(); + suspend_ops = ops; + unlock_system_sleep(); +} +EXPORT_SYMBOL_GPL(suspend_set_ops); + +bool valid_state(suspend_state_t state) +{ + /* + * All states need lowlevel support and need to be valid to the lowlevel + * implementation, no valid callback implies that none are valid. + */ + return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); +} + +/** + * suspend_valid_only_mem - Generic memory-only valid callback. + * + * Platform drivers that implement mem suspend only and only need to check for + * that in their .valid() callback can use this instead of rolling their own + * .valid() callback. + */ +int suspend_valid_only_mem(suspend_state_t state) +{ + return state == PM_SUSPEND_MEM; +} +EXPORT_SYMBOL_GPL(suspend_valid_only_mem); + +static int suspend_test(int level) +{ +#ifdef CONFIG_PM_DEBUG + if (pm_test_level == level) { + printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n"); + mdelay(5000); + return 1; + } +#endif /* !CONFIG_PM_DEBUG */ + return 0; +} + +/** + * suspend_prepare - Prepare for entering system sleep state. + * + * Common code run for every system sleep state that can be entered (except for + * hibernation). Run suspend notifiers, allocate the "suspend" console and + * freeze processes. + */ +static int suspend_prepare(void) +{ + int error; + + if (!suspend_ops || !suspend_ops->enter) + return -EPERM; + + pm_prepare_console(); + + error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); + if (error) + goto Finish; + + error = suspend_freeze_processes(); + if (!error) + return 0; + + suspend_stats.failed_freeze++; + dpm_save_failed_step(SUSPEND_FREEZE); + Finish: + pm_notifier_call_chain(PM_POST_SUSPEND); + pm_restore_console(); + return error; +} + +/* default implementation */ +void __attribute__ ((weak)) arch_suspend_disable_irqs(void) +{ + local_irq_disable(); +} + +/* default implementation */ +void __attribute__ ((weak)) arch_suspend_enable_irqs(void) +{ + local_irq_enable(); +} + +extern unsigned int cpu_trustzone_enabled; + +/** + * suspend_enter - Make the system enter the given sleep state. + * @state: System sleep state to enter. + * @wakeup: Returns information that the sleep state should not be re-entered. + * + * This function should be called after devices have been suspended. + */ +static int suspend_enter(suspend_state_t state, bool *wakeup) +{ + int error; + + if (suspend_ops->prepare) { + error = suspend_ops->prepare(); + if (error) + goto Platform_finish; + } + + error = dpm_suspend_end(PMSG_SUSPEND); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down\n"); + goto Platform_finish; + } + + + if (suspend_ops->prepare_late) { + error = suspend_ops->prepare_late(); + if (error) + goto Platform_wake; + } + + if (suspend_test(TEST_PLATFORM)) + goto Platform_wake; + + error = disable_nonboot_cpus(); + if (error || suspend_test(TEST_CPUS)) + goto Enable_cpus; + + arch_suspend_disable_irqs(); + BUG_ON(!irqs_disabled()); + + /*Secure OS FIQ disable*/ + if (cpu_trustzone_enabled == 1) { + wmt_smc(WMT_SMC_CMD_SECURE_GIC_CTL, GIC_DISABLE); + } + + error = syscore_suspend(); + + /*Secure OS GIC suspend*/ + if (cpu_trustzone_enabled == 1) { + wmt_smc(WMT_SMC_CMD_SECURE_GIC_CTL, GIC_SUSPEND); + } + + WMT_WAKE_UP_EVENT = 0; + if (!error) { + + unsigned int pmc_temp; + pmc_temp = PMWS_VAL; + pmc_temp &= (WK_TRG_EN_VAL | 0x4000); + if (!pmc_temp) { + *wakeup = pm_wakeup_pending(); + if (!(suspend_test(TEST_CORE) || *wakeup)) { + error = suspend_ops->enter(state); + events_check_enabled = false; + } + } else { + WMT_WAKE_UP_EVENT = (PMWS_VAL & (WK_TRG_EN_VAL | 0x4000));//wmt_pm_enter + } + + /*Secure OS GIC resume*/ + if (cpu_trustzone_enabled == 1) { + wmt_smc(WMT_SMC_CMD_SECURE_GIC_CTL, GIC_RESUME); + } + + + syscore_resume(); + } + /*Disable Secure OS FIQ*/ + if (cpu_trustzone_enabled == 1) { + wmt_smc(WMT_SMC_CMD_SECURE_GIC_CTL, GIC_ENABLE); + + } + + arch_suspend_enable_irqs(); + BUG_ON(irqs_disabled()); + + Enable_cpus: + enable_nonboot_cpus(); + + Platform_wake: + if (suspend_ops->wake) + suspend_ops->wake(); + + dpm_resume_start(PMSG_RESUME); + + Platform_finish: + if (suspend_ops->finish) + suspend_ops->finish(); + + return error; +} + +/** + * suspend_devices_and_enter - Suspend devices and enter system sleep state. + * @state: System sleep state to enter. + */ + +extern int wmt_trigger_resume_kpad; +extern int wmt_trigger_resume_notify; +extern void wmt_resume_kpad(void); +extern void wmt_resume_notify(void); + +int suspend_devices_and_enter(suspend_state_t state) +{ + int error; + bool wakeup = false; + + if (!suspend_ops) + return -ENOSYS; + + trace_machine_suspend(state); + + if (suspend_ops->begin) { + error = suspend_ops->begin(state); + if (error) + goto Close; + } + /* + suspend_console(); + */ + suspend_test_start(); + error = dpm_suspend_start(PMSG_SUSPEND); + if (error) { + printk(KERN_ERR "PM: Some devices failed to suspend\n"); + goto Recover_platform; + } + suspend_test_finish("suspend devices"); + if (suspend_test(TEST_DEVICES)) + goto Recover_platform; + + do { + error = suspend_enter(state, &wakeup); + } while (!error && !wakeup + && suspend_ops->suspend_again && suspend_ops->suspend_again()); + + Resume_devices: + + if (wmt_trigger_resume_kpad){ + wmt_trigger_resume_kpad=0; + wmt_resume_kpad(); + } + if (wmt_trigger_resume_notify){ + wmt_trigger_resume_notify=0; + wmt_resume_notify(); + } + + + suspend_test_start(); + dpm_resume_end(PMSG_RESUME); + suspend_test_finish("resume devices"); + resume_console(); + printk("WMT wake up event %x\n",WMT_WAKE_UP_EVENT); + WMT_WAKE_UP_EVENT = 0; + Close: + if (suspend_ops->end) + suspend_ops->end(); + + trace_machine_suspend(PWR_EVENT_EXIT); + return error; + + Recover_platform: + if (suspend_ops->recover) + suspend_ops->recover(); + goto Resume_devices; +} + +/** + * suspend_finish - Clean up before finishing the suspend sequence. + * + * Call platform code to clean up, restart processes, and free the console that + * we've allocated. This routine is not called for hibernation. + */ +static void suspend_finish(void) +{ + suspend_thaw_processes(); + pm_notifier_call_chain(PM_POST_SUSPEND); + pm_restore_console(); +} + +static int run_suspend(void) +{ + int ret; + char *argv[] = { "/system/etc/wmt/pm.sh", "", NULL }; + char *envp[] = + { "HOME=/", "PATH=/sbin:/bin:/usr/sbin:/usr/bin", "ACTION=suspend", NULL }; + + ret = call_usermodehelper(argv[0], argv, envp, 1); + return ret; +} + +static int run_resume(void) +{ + int ret; + char *argv[] = { "/system/etc/wmt/pm.sh", "", NULL }; + char *envp[] = + { "HOME=/", "PATH=/sbin:/bin:/usr/sbin:/usr/bin", "ACTION=resume", NULL }; + + ret = call_usermodehelper(argv[0], argv, envp, 1); + return ret; +} + +/** + * enter_state - Do common work needed to enter system sleep state. + * @state: System sleep state to enter. + * + * Make sure that no one else is trying to put the system into a sleep state. + * Fail if that's not the case. Otherwise, prepare for system suspend, make the + * system enter the given sleep state and clean up after wakeup. + */ +static int enter_state(suspend_state_t state) +{ + int error; + + if (!valid_state(state)) + return -ENODEV; + + if (!mutex_trylock(&pm_mutex)) + return -EBUSY; + + run_suspend(); + printk(KERN_INFO "PM: Syncing filesystems ... "); + sys_sync(); + printk("done.\n"); + + pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); + error = suspend_prepare(); + if (error) + goto Unlock; + + if (suspend_test(TEST_FREEZER)) + goto Finish; + + pr_debug("PM: Entering %s sleep\n", pm_states[state]); + pm_restrict_gfp_mask(); + error = suspend_devices_and_enter(state); + pm_restore_gfp_mask(); + + Finish: + pr_debug("PM: Finishing wakeup.\n"); + suspend_finish(); + run_resume(); + Unlock: + mutex_unlock(&pm_mutex); + return error; +} + +static void pm_suspend_marker(char *annotation) +{ + struct timespec ts; + struct rtc_time tm; + + getnstimeofday(&ts); + rtc_time_to_tm(ts.tv_sec, &tm); + printk(KERN_ERR"PM: suspend %s %d-%02d-%02d %02d:%02d:%02d.%09lu UTC\n", + annotation, tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec); +} + +/** + * pm_suspend - Externally visible function for suspending the system. + * @state: System sleep state to enter. + * + * Check if the value of @state represents one of the supported states, + * execute enter_state() and update system suspend statistics. + */ +int pm_suspend(suspend_state_t state) +{ + int error; + + if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) + return -EINVAL; + + pm_suspend_marker("entry"); + error = enter_state(state); + if (error) { + suspend_stats.fail++; + dpm_save_failed_errno(error); + } else { + suspend_stats.success++; + } + pm_suspend_marker("exit"); + return error; +} +EXPORT_SYMBOL(pm_suspend); diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c new file mode 100644 index 00000000..25596e45 --- /dev/null +++ b/kernel/power/suspend_test.c @@ -0,0 +1,188 @@ +/* + * kernel/power/suspend_test.c - Suspend to RAM and standby test facility. + * + * Copyright (c) 2009 Pavel Machek <pavel@ucw.cz> + * + * This file is released under the GPLv2. + */ + +#include <linux/init.h> +#include <linux/rtc.h> + +#include "power.h" + +/* + * We test the system suspend code by setting an RTC wakealarm a short + * time in the future, then suspending. Suspending the devices won't + * normally take long ... some systems only need a few milliseconds. + * + * The time it takes is system-specific though, so when we test this + * during system bootup we allow a LOT of time. + */ +#define TEST_SUSPEND_SECONDS 10 + +static unsigned long suspend_test_start_time; + +void suspend_test_start(void) +{ + /* FIXME Use better timebase than "jiffies", ideally a clocksource. + * What we want is a hardware counter that will work correctly even + * during the irqs-are-off stages of the suspend/resume cycle... + */ + suspend_test_start_time = jiffies; +} + +void suspend_test_finish(const char *label) +{ + long nj = jiffies - suspend_test_start_time; + unsigned msec; + + msec = jiffies_to_msecs(abs(nj)); + pr_info("PM: %s took %d.%03d seconds\n", label, + msec / 1000, msec % 1000); + + /* Warning on suspend means the RTC alarm period needs to be + * larger -- the system was sooo slooowwww to suspend that the + * alarm (should have) fired before the system went to sleep! + * + * Warning on either suspend or resume also means the system + * has some performance issues. The stack dump of a WARN_ON + * is more likely to get the right attention than a printk... + */ + WARN(msec > (TEST_SUSPEND_SECONDS * 1000), + "Component: %s, time: %u\n", label, msec); +} + +/* + * To test system suspend, we need a hands-off mechanism to resume the + * system. RTCs wake alarms are a common self-contained mechanism. + */ + +static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) +{ + static char err_readtime[] __initdata = + KERN_ERR "PM: can't read %s time, err %d\n"; + static char err_wakealarm [] __initdata = + KERN_ERR "PM: can't set %s wakealarm, err %d\n"; + static char err_suspend[] __initdata = + KERN_ERR "PM: suspend test failed, error %d\n"; + static char info_test[] __initdata = + KERN_INFO "PM: test RTC wakeup from '%s' suspend\n"; + + unsigned long now; + struct rtc_wkalrm alm; + int status; + + /* this may fail if the RTC hasn't been initialized */ + status = rtc_read_time(rtc, &alm.time); + if (status < 0) { + printk(err_readtime, dev_name(&rtc->dev), status); + return; + } + rtc_tm_to_time(&alm.time, &now); + + memset(&alm, 0, sizeof alm); + rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time); + alm.enabled = true; + + status = rtc_set_alarm(rtc, &alm); + if (status < 0) { + printk(err_wakealarm, dev_name(&rtc->dev), status); + return; + } + + if (state == PM_SUSPEND_MEM) { + printk(info_test, pm_states[state]); + status = pm_suspend(state); + if (status == -ENODEV) + state = PM_SUSPEND_STANDBY; + } + if (state == PM_SUSPEND_STANDBY) { + printk(info_test, pm_states[state]); + status = pm_suspend(state); + } + if (status < 0) + printk(err_suspend, status); + + /* Some platforms can't detect that the alarm triggered the + * wakeup, or (accordingly) disable it after it afterwards. + * It's supposed to give oneshot behavior; cope. + */ + alm.enabled = false; + rtc_set_alarm(rtc, &alm); +} + +static int __init has_wakealarm(struct device *dev, void *name_ptr) +{ + struct rtc_device *candidate = to_rtc_device(dev); + + if (!candidate->ops->set_alarm) + return 0; + if (!device_may_wakeup(candidate->dev.parent)) + return 0; + + *(const char **)name_ptr = dev_name(dev); + return 1; +} + +/* + * Kernel options like "test_suspend=mem" force suspend/resume sanity tests + * at startup time. They're normally disabled, for faster boot and because + * we can't know which states really work on this particular system. + */ +static suspend_state_t test_state __initdata = PM_SUSPEND_ON; + +static char warn_bad_state[] __initdata = + KERN_WARNING "PM: can't test '%s' suspend state\n"; + +static int __init setup_test_suspend(char *value) +{ + unsigned i; + + /* "=mem" ==> "mem" */ + value++; + for (i = 0; i < PM_SUSPEND_MAX; i++) { + if (!pm_states[i]) + continue; + if (strcmp(pm_states[i], value) != 0) + continue; + test_state = (__force suspend_state_t) i; + return 0; + } + printk(warn_bad_state, value); + return 0; +} +__setup("test_suspend", setup_test_suspend); + +static int __init test_suspend(void) +{ + static char warn_no_rtc[] __initdata = + KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n"; + + char *pony = NULL; + struct rtc_device *rtc = NULL; + + /* PM is initialized by now; is that state testable? */ + if (test_state == PM_SUSPEND_ON) + goto done; + if (!valid_state(test_state)) { + printk(warn_bad_state, pm_states[test_state]); + goto done; + } + + /* RTCs have initialized by now too ... can we use one? */ + class_find_device(rtc_class, NULL, &pony, has_wakealarm); + if (pony) + rtc = rtc_class_open(pony); + if (!rtc) { + printk(warn_no_rtc); + goto done; + } + + /* go for it */ + test_wakealarm(rtc, test_state); + rtc_class_close(rtc); +done: + return 0; +} +late_initcall(test_suspend); diff --git a/kernel/power/suspend_time.c b/kernel/power/suspend_time.c new file mode 100644 index 00000000..d2a65da9 --- /dev/null +++ b/kernel/power/suspend_time.c @@ -0,0 +1,111 @@ +/* + * debugfs file to track time spent in suspend + * + * Copyright (c) 2011, Google, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/debugfs.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/seq_file.h> +#include <linux/syscore_ops.h> +#include <linux/time.h> + +static struct timespec suspend_time_before; +static unsigned int time_in_suspend_bins[32]; + +#ifdef CONFIG_DEBUG_FS +static int suspend_time_debug_show(struct seq_file *s, void *data) +{ + int bin; + seq_printf(s, "time (secs) count\n"); + seq_printf(s, "------------------\n"); + for (bin = 0; bin < 32; bin++) { + if (time_in_suspend_bins[bin] == 0) + continue; + seq_printf(s, "%4d - %4d %4u\n", + bin ? 1 << (bin - 1) : 0, 1 << bin, + time_in_suspend_bins[bin]); + } + return 0; +} + +static int suspend_time_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, suspend_time_debug_show, NULL); +} + +static const struct file_operations suspend_time_debug_fops = { + .open = suspend_time_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init suspend_time_debug_init(void) +{ + struct dentry *d; + + d = debugfs_create_file("suspend_time", 0755, NULL, NULL, + &suspend_time_debug_fops); + if (!d) { + pr_err("Failed to create suspend_time debug file\n"); + return -ENOMEM; + } + + return 0; +} + +late_initcall(suspend_time_debug_init); +#endif + +static int suspend_time_syscore_suspend(void) +{ + read_persistent_clock(&suspend_time_before); + + return 0; +} + +static void suspend_time_syscore_resume(void) +{ + struct timespec after; + + read_persistent_clock(&after); + + after = timespec_sub(after, suspend_time_before); + + time_in_suspend_bins[fls(after.tv_sec)]++; + + pr_info("Suspended for %lu.%03lu seconds\n", after.tv_sec, + after.tv_nsec / NSEC_PER_MSEC); +} + +static struct syscore_ops suspend_time_syscore_ops = { + .suspend = suspend_time_syscore_suspend, + .resume = suspend_time_syscore_resume, +}; + +static int suspend_time_syscore_init(void) +{ + register_syscore_ops(&suspend_time_syscore_ops); + + return 0; +} + +static void suspend_time_syscore_exit(void) +{ + unregister_syscore_ops(&suspend_time_syscore_ops); +} +module_init(suspend_time_syscore_init); +module_exit(suspend_time_syscore_exit); diff --git a/kernel/power/swap.c b/kernel/power/swap.c new file mode 100644 index 00000000..eb783023 --- /dev/null +++ b/kernel/power/swap.c @@ -0,0 +1,2064 @@ +/* + * linux/kernel/power/swap.c + * + * This file provides functions for reading the suspend image from + * and writing it to a swap partition. + * + * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * Copyright (C) 2010-2012 Bojan Smojver <bojan@rexursive.com> + * + * This file is released under the GPLv2. + * + */ + +#include <linux/module.h> +#include <linux/file.h> +#include <linux/delay.h> +#include <linux/bitops.h> +#include <linux/genhd.h> +#include <linux/device.h> +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/pm.h> +#include <linux/slab.h> +#include <linux/lzo.h> +#include <linux/lz4.h> +#include <linux/vmalloc.h> +#include <linux/cpumask.h> +#include <linux/atomic.h> +#include <linux/kthread.h> +#include <linux/crc32.h> + +#include "power.h" +#include <mach/hardware.h> + +#define HIBERNATE_SIG "S1SUSPEND" +#define _DISABLE_CRC_THREAD_ 1 /*disable crc check for better performance*/ +/*#undef _DISABLE_CRC_THREAD_ */ + +#define COMPRESS_MODE_LZ4 1 /*change compress mode from lzo to lz4 */ +extern int WMT_WAKE_UP_EVENT; + +/* + * The swap map is a data structure used for keeping track of each page + * written to a swap partition. It consists of many swap_map_page + * structures that contain each an array of MAP_PAGE_ENTRIES swap entries. + * These structures are stored on the swap and linked together with the + * help of the .next_swap member. + * + * The swap map is created during suspend. The swap map pages are + * allocated and populated one at a time, so we only need one memory + * page to set up the entire structure. + * + * During resume we pick up all swap_map_page structures into a list. + */ + +#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1) + +/* + * Number of free pages that are not high. + */ +static inline unsigned long low_free_pages(void) +{ + return nr_free_pages() - nr_free_highpages(); +} + +/* + * Number of pages required to be kept free while writing the image. Always + * half of all available low pages before the writing starts. + */ +static inline unsigned long reqd_free_pages(void) +{ + return low_free_pages() / 2; +} + +struct swap_map_page { + sector_t entries[MAP_PAGE_ENTRIES]; + sector_t next_swap; +}; + +struct swap_map_page_list { + struct swap_map_page *map; + struct swap_map_page_list *next; +}; + +/** + * The swap_map_handle structure is used for handling swap in + * a file-alike way + */ + +struct swap_map_handle { + struct swap_map_page *cur; + struct swap_map_page_list *maps; + sector_t cur_swap; + sector_t first_sector; + unsigned int k; + unsigned long reqd_free_pages; + u32 crc32; +}; + +struct swsusp_header { + char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) - + sizeof(u32)]; + u32 crc32; + sector_t image; + unsigned int flags; /* Flags to pass to the "boot" kernel */ + char orig_sig[10]; + char sig[10]; +} __attribute__((packed)); + +static struct swsusp_header *swsusp_header; + +/** + * The following functions are used for tracing the allocated + * swap pages, so that they can be freed in case of an error. + */ + +struct swsusp_extent { + struct rb_node node; + unsigned long start; + unsigned long end; +}; + +static struct rb_root swsusp_extents = RB_ROOT; + +static int swsusp_extents_insert(unsigned long swap_offset) +{ + struct rb_node **new = &(swsusp_extents.rb_node); + struct rb_node *parent = NULL; + struct swsusp_extent *ext; + + /* Figure out where to put the new node */ + while (*new) { + ext = container_of(*new, struct swsusp_extent, node); + parent = *new; + if (swap_offset < ext->start) { + /* Try to merge */ + if (swap_offset == ext->start - 1) { + ext->start--; + return 0; + } + new = &((*new)->rb_left); + } else if (swap_offset > ext->end) { + /* Try to merge */ + if (swap_offset == ext->end + 1) { + ext->end++; + return 0; + } + new = &((*new)->rb_right); + } else { + /* It already is in the tree */ + return -EINVAL; + } + } + /* Add the new node and rebalance the tree. */ + ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL); + if (!ext) + return -ENOMEM; + + ext->start = swap_offset; + ext->end = swap_offset; + rb_link_node(&ext->node, parent, new); + rb_insert_color(&ext->node, &swsusp_extents); + return 0; +} + +/** + * alloc_swapdev_block - allocate a swap page and register that it has + * been allocated, so that it can be freed in case of an error. + */ + +sector_t alloc_swapdev_block(int swap) +{ + unsigned long offset; + + offset = swp_offset(get_swap_page_of_type(swap)); + if (offset) { + if (swsusp_extents_insert(offset)) + swap_free(swp_entry(swap, offset)); + else + return swapdev_block(swap, offset); + } + return 0; +} + +/** + * free_all_swap_pages - free swap pages allocated for saving image data. + * It also frees the extents used to register which swap entries had been + * allocated. + */ + +void free_all_swap_pages(int swap) +{ + struct rb_node *node; + + while ((node = swsusp_extents.rb_node)) { + struct swsusp_extent *ext; + unsigned long offset; + + ext = container_of(node, struct swsusp_extent, node); + rb_erase(node, &swsusp_extents); + for (offset = ext->start; offset <= ext->end; offset++) + swap_free(swp_entry(swap, offset)); + + kfree(ext); + } +} + +int swsusp_swap_in_use(void) +{ + return (swsusp_extents.rb_node != NULL); +} + +/* + * General things + */ + +static unsigned short root_swap = 0xffff; +struct block_device *hib_resume_bdev; + +/* + * Saving part + */ + +static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) +{ + int error; + + hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); + if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || + !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { + memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); + memcpy(swsusp_header->sig, HIBERNATE_SIG, 10); + swsusp_header->image = handle->first_sector; + swsusp_header->flags = flags; + if (flags & SF_CRC32_MODE) + swsusp_header->crc32 = handle->crc32; + error = hib_bio_write_page(swsusp_resume_block, + swsusp_header, NULL); + } else { + printk(KERN_ERR "PM: Swap header not found!\n"); + error = -ENODEV; + } + return error; +} + +/** + * swsusp_swap_check - check if the resume device is a swap device + * and get its index (if so) + * + * This is called before saving image + */ +int swsusp_swap_check(void) +{ + int res; + + res = swap_type_of(swsusp_resume_device, swsusp_resume_block, + &hib_resume_bdev); + if (res < 0) + return res; + + root_swap = res; + res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL); + if (res) + return res; + + res = set_blocksize(hib_resume_bdev, PAGE_SIZE); + if (res < 0) + blkdev_put(hib_resume_bdev, FMODE_WRITE); + + return res; +} + +/** + * write_page - Write one page to given swap location. + * @buf: Address we're writing. + * @offset: Offset of the swap page we're writing to. + * @bio_chain: Link the next write BIO here + */ + +static int write_page(void *buf, sector_t offset, struct bio **bio_chain) +{ + void *src; + int ret; + + if (!offset) + return -ENOSPC; + + if (bio_chain) { + src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | + __GFP_NORETRY); + if (src) { + copy_page(src, buf); + } else { + ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ + if (ret) + return ret; + src = (void *)__get_free_page(__GFP_WAIT | + __GFP_NOWARN | + __GFP_NORETRY); + if (src) { + copy_page(src, buf); + } else { + WARN_ON_ONCE(1); + bio_chain = NULL; /* Go synchronous */ + src = buf; + } + } + } else { + src = buf; + } + return hib_bio_write_page(offset, src, bio_chain); +} + +static void release_swap_writer(struct swap_map_handle *handle) +{ + if (handle->cur) + free_page((unsigned long)handle->cur); + handle->cur = NULL; +} + +static int get_swap_writer(struct swap_map_handle *handle) +{ + int ret; + + ret = swsusp_swap_check(); + if (ret) { + if (ret != -ENOSPC) + printk(KERN_ERR "PM: Cannot find swap device, try " + "swapon -a.\n"); + return ret; + } + handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); + if (!handle->cur) { + ret = -ENOMEM; + goto err_close; + } + handle->cur_swap = alloc_swapdev_block(root_swap); + if (!handle->cur_swap) { + ret = -ENOSPC; + goto err_rel; + } + handle->k = 0; + handle->reqd_free_pages = reqd_free_pages(); + handle->first_sector = handle->cur_swap; + return 0; +err_rel: + release_swap_writer(handle); +err_close: + swsusp_close(FMODE_WRITE); + return ret; +} + +static int swap_write_page(struct swap_map_handle *handle, void *buf, + struct bio **bio_chain) +{ + int error = 0; + sector_t offset; + + if (!handle->cur) + return -EINVAL; + offset = alloc_swapdev_block(root_swap); + error = write_page(buf, offset, bio_chain); + if (error) + return error; + handle->cur->entries[handle->k++] = offset; + if (handle->k >= MAP_PAGE_ENTRIES) { + offset = alloc_swapdev_block(root_swap); + if (!offset) + return -ENOSPC; + handle->cur->next_swap = offset; + error = write_page(handle->cur, handle->cur_swap, bio_chain); + if (error) + goto out; + clear_page(handle->cur); + handle->cur_swap = offset; + handle->k = 0; + + if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { + error = hib_wait_on_bio_chain(bio_chain); + if (error) + goto out; + /* + * Recalculate the number of required free pages, to + * make sure we never take more than half. + */ + handle->reqd_free_pages = reqd_free_pages(); + } + } + out: + return error; +} + +static int flush_swap_writer(struct swap_map_handle *handle) +{ + if (handle->cur && handle->cur_swap) + return write_page(handle->cur, handle->cur_swap, NULL); + else + return -EINVAL; +} + +static int swap_writer_finish(struct swap_map_handle *handle, + unsigned int flags, int error) +{ + if (!error) { + flush_swap_writer(handle); + printk(KERN_INFO "PM: S"); + error = mark_swapfiles(handle, flags); + /* + FIXME: + Image Mark S1SUSPEND is not saved to disk unless do flush_swap_writer again. + The root cause is unknown yet. + */ + flush_swap_writer(handle); + + printk("|\n"); + } + + if (error) + free_all_swap_pages(root_swap); + release_swap_writer(handle); + swsusp_close(FMODE_WRITE); + + return error; +} + +/* We need to remember how much compressed data we need to read. */ +#define LZO_HEADER sizeof(size_t) + +/* Number of pages/bytes we'll compress at one time. */ +#define LZO_UNC_PAGES 256 /*32:orginal.*/ +#define LZO_UNC_SIZE (LZO_UNC_PAGES * PAGE_SIZE) + +/* Number of pages/bytes we need for compressed data (worst case). */ +#ifdef COMPRESS_MODE_LZ4 + +#define LZO_CMP_PAGES DIV_ROUND_UP(lz4_worst_compress(LZO_UNC_SIZE) + \ + LZO_HEADER, PAGE_SIZE) + +#else + +#define LZO_CMP_PAGES DIV_ROUND_UP(lzo1x_worst_compress(LZO_UNC_SIZE) + \ + LZO_HEADER, PAGE_SIZE) +#endif + +#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE) + +/* Maximum number of threads for compression/decompression. */ +#define LZO_THREADS 3 + +/* Buffer are divided into blocks, orginally only 1 block.*/ +#define LZO_BUF_BLOCK_NUM (2) +/* Minimum/maximum number of pages for read buffering. */ +#define LZO_MIN_RD_PAGES 1024 /*org: 1024 */ +#define LZO_MAX_RD_PAGES 1024 /*original size :8192 */ + +/** + * save_image - save the suspend image data + */ + +static int save_image(struct swap_map_handle *handle, + struct snapshot_handle *snapshot, + unsigned int nr_to_write) +{ + unsigned int m; + int ret; + int nr_pages; + int err2; + struct bio *bio; + struct timeval start; + struct timeval stop; + + printk(KERN_INFO "PM: Saving image data pages (%u pages) ... ", + nr_to_write); + m = nr_to_write / 100; + if (!m) + m = 1; + nr_pages = 0; + bio = NULL; + do_gettimeofday(&start); + while (1) { + ret = snapshot_read_next(snapshot); + if (ret <= 0) + break; + ret = swap_write_page(handle, data_of(*snapshot), &bio); + if (ret) + break; + if (!(nr_pages % m)) + printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + } + err2 = hib_wait_on_bio_chain(&bio); + do_gettimeofday(&stop); + if (!ret) + ret = err2; + if (!ret) + printk(KERN_CONT "\b\b\b\bdone\n"); + else + printk(KERN_CONT "\n"); + swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); + return ret; +} + +/** + * Structure used for CRC32. + */ +struct crc_data { + struct task_struct *thr; /* thread */ + atomic_t ready; /* ready to start flag */ + atomic_t stop; /* ready to stop flag */ + unsigned run_threads; /* nr current threads */ + wait_queue_head_t go; /* start crc update */ + wait_queue_head_t done; /* crc update done */ + u32 *crc32; /* points to handle's crc32 */ + size_t *unc_len[LZO_THREADS]; /* uncompressed lengths */ + unsigned char *unc[LZO_THREADS]; /* uncompressed data */ +}; + +/** + * CRC32 update function that runs in its own thread. + */ +static int crc32_threadfn(void *data) +{ + struct crc_data *d = data; + unsigned i; + + while (1) { + wait_event(d->go, atomic_read(&d->ready) || + kthread_should_stop()); + if (kthread_should_stop()) { + d->thr = NULL; + atomic_set(&d->stop, 1); + wake_up(&d->done); + break; + } + atomic_set(&d->ready, 0); + + for (i = 0; i < d->run_threads; i++) + *d->crc32 = crc32_le(*d->crc32, + d->unc[i], *d->unc_len[i]); + atomic_set(&d->stop, 1); + wake_up(&d->done); + } + return 0; +} +/** + * Structure used for LZO data compression. + */ +struct cmp_data { + struct task_struct *thr; /* thread */ + atomic_t ready; /* ready to start flag */ + atomic_t stop; /* ready to stop flag */ + int ret; /* return code */ + wait_queue_head_t go; /* start compression */ + wait_queue_head_t done; /* compression done */ + size_t unc_len; /* uncompressed length */ + size_t cmp_len; /* compressed length */ + unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */ + unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */ + unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */ +}; + +/** + * Compression function that runs in its own thread. + */ +#ifdef COMPRESS_MODE_LZ4 +static int lz4_compress_threadfn(void *data) +{ + struct cmp_data *d = data; + + while (1) { + wait_event(d->go, atomic_read(&d->ready) || + kthread_should_stop()); + if (kthread_should_stop()) { + d->thr = NULL; + d->ret = -1; + atomic_set(&d->stop, 1); + wake_up(&d->done); + break; + } + atomic_set(&d->ready, 0); + + d->ret = lz4_compress(d->unc, d->unc_len, + d->cmp + LZO_HEADER, &d->cmp_len, d->wrk); + atomic_set(&d->stop, 1); + wake_up(&d->done); + } + return 0; +} +#else +static int lzo_compress_threadfn(void *data) +{ + struct cmp_data *d = data; + while (1) { + wait_event(d->go, atomic_read(&d->ready) || + kthread_should_stop()); + if (kthread_should_stop()) { + d->thr = NULL; + d->ret = -1; + atomic_set(&d->stop, 1); + wake_up(&d->done); + break; + } + atomic_set(&d->ready, 0); + d->ret = lzo1x_1_compress(d->unc, d->unc_len, + d->cmp + LZO_HEADER, &d->cmp_len, + d->wrk); + atomic_set(&d->stop, 1); + wake_up(&d->done); + } + return 0; +} +#endif + +/** + * save_image_lzo - Save the suspend image data compressed with LZO. + * @handle: Swap mam handle to use for saving the image. + * @snapshot: Image to read data from. + * @nr_to_write: Number of pages to save. + */ +static int save_image_compress(struct swap_map_handle *handle, + struct snapshot_handle *snapshot, + unsigned int nr_to_write) +{ + unsigned int m; + int ret = 0; + int nr_pages; + int nr_pages_write=0;//record image size after compression. + int err2; + struct bio *bio; + struct timeval start; + struct timeval stop; + size_t off; + unsigned thr, run_threads, nr_threads; + unsigned char *page = NULL; + struct cmp_data *data = NULL; + struct crc_data *crc = NULL; + unsigned int pmc_temp; + + /* + * We'll limit the number of threads for compression to limit memory + * footprint. + */ + nr_threads = num_online_cpus() - 1; /*original: num_online_cpus() - 1*/ + nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); + + page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + if (!page) { + printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + ret = -ENOMEM; + goto out_clean; + } + + data = vmalloc(sizeof(*data) * nr_threads); + if (!data) { + printk(KERN_ERR "PM: Failed to allocate LZO data\n"); + ret = -ENOMEM; + goto out_clean; + } + for (thr = 0; thr < nr_threads; thr++) + memset(&data[thr], 0, offsetof(struct cmp_data, go)); + + crc = kmalloc(sizeof(*crc), GFP_KERNEL); + if (!crc) { + printk(KERN_ERR "PM: Failed to allocate crc\n"); + ret = -ENOMEM; + goto out_clean; + } + memset(crc, 0, offsetof(struct crc_data, go)); + + /* + * Start the compression threads. + */ + for (thr = 0; thr < nr_threads; thr++) { + init_waitqueue_head(&data[thr].go); + init_waitqueue_head(&data[thr].done); + +#ifdef COMPRESS_MODE_LZ4 + data[thr].thr = kthread_run(lz4_compress_threadfn, + &data[thr], + "image_compress/%u", thr); +#else + data[thr].thr = kthread_run(lzo_compress_threadfn, + &data[thr], + "image_compress/%u", thr); +#endif + + if (IS_ERR(data[thr].thr)) { + data[thr].thr = NULL; + printk(KERN_ERR + "PM: Cannot start compression threads\n"); + ret = -ENOMEM; + goto out_clean; + } + printk(KERN_CRIT"%s:kthread:image_compress/%u:cpu:%d:pid:%d\n", __FUNCTION__, + thr, data[thr].thr->on_cpu, data[thr].thr->pid); + } +#ifndef _DISABLE_CRC_THREAD_ + /* + * Start the CRC32 thread. + */ + init_waitqueue_head(&crc->go); + init_waitqueue_head(&crc->done); + + handle->crc32 = 0; + crc->crc32 = &handle->crc32; + for (thr = 0; thr < nr_threads; thr++) { + crc->unc[thr] = data[thr].unc; + crc->unc_len[thr] = &data[thr].unc_len; + } + + crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32"); + if (IS_ERR(crc->thr)) { + crc->thr = NULL; + printk(KERN_ERR "PM: Cannot start CRC32 thread\n"); + ret = -ENOMEM; + goto out_clean; + } + printk(KERN_CRIT"%s:kthread:image_crc32:cpu:%d:pid:%d\n", __FUNCTION__, + crc->thr->on_cpu, crc->thr->pid); +#endif + /* + * Adjust the number of required free pages after all allocations have + * been done. We don't want to run out of pages when writing. + */ + handle->reqd_free_pages = reqd_free_pages(); + + printk(KERN_CRIT + "PM: Using %u thread(s) for compression.\n" + "PM: Compressing and saving image data (%u pages) ... ", + nr_threads, nr_to_write); + m = nr_to_write / 100; + if (!m) + m = 1; + nr_pages = 0; + bio = NULL; + do_gettimeofday(&start); + for (;;) { + for (thr = 0; thr < nr_threads; thr++) { + for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) { + ret = snapshot_read_next(snapshot); + if (ret < 0) + goto out_finish; + + if (!ret) + break; + + memcpy(data[thr].unc + off, + data_of(*snapshot), PAGE_SIZE); + + swsusp_free_page(data_of(*snapshot)); + + + if (!(nr_pages % m)) + printk(KERN_CONT "\b\b\b\b%3d%%", + nr_pages / m); + nr_pages++; + } + if (!off) + break; + + data[thr].unc_len = off; + + atomic_set(&data[thr].ready, 1); + wake_up(&data[thr].go); + } + + if (!thr) + break; + if ((nr_pages / m)%5 == 0) { + WMT_WAKE_UP_EVENT = 0; + pmc_temp = PMWS_VAL; + pmc_temp &= (WK_TRG_EN_VAL | 0x4000); + WMT_WAKE_UP_EVENT = (PMWS_VAL & (WK_TRG_EN_VAL | 0x4000));//wmt_pm_enter + if (pmc_temp == 0x4000) { + ret = -STD_USER_ABORT; + goto out_finish; + } + } + #ifndef _DISABLE_CRC_THREAD_ + crc->run_threads = thr; + atomic_set(&crc->ready, 1); + wake_up(&crc->go); + #endif + for (run_threads = thr, thr = 0; thr < run_threads; thr++) { + wait_event(data[thr].done, + atomic_read(&data[thr].stop)); + atomic_set(&data[thr].stop, 0); + + ret = data[thr].ret; + + if (ret < 0) { + printk(KERN_ERR "PM: LZO compression failed\n"); + goto out_finish; + } + if (unlikely(!data[thr].cmp_len || data[thr].cmp_len > +#ifdef COMPRESS_MODE_LZ4 + lz4_worst_compress(data[thr].unc_len) +#else + lzo1x_worst_compress(data[thr].unc_len) +#endif + )) { + printk(KERN_ERR + "PM: Invalid LZO compressed length\n"); + ret = -1; + goto out_finish; + } + + *(size_t *)data[thr].cmp = data[thr].cmp_len; + + /* + * Given we are writing one page at a time to disk, we + * copy that much from the buffer, although the last + * bit will likely be smaller than full page. This is + * OK - we saved the length of the compressed data, so + * any garbage at the end will be discarded when we + * read it. + */ + for (off = 0; + off < LZO_HEADER + data[thr].cmp_len; + off += PAGE_SIZE) { + memcpy(page, data[thr].cmp + off, PAGE_SIZE); + + ret = swap_write_page(handle, page, &bio); + if (ret) + goto out_finish; + nr_pages_write++;//one page saved to disk, count++ + } + } + #ifndef _DISABLE_CRC_THREAD_ + wait_event(crc->done, atomic_read(&crc->stop)); + atomic_set(&crc->stop, 0); + #endif + } + +out_finish: + err2 = hib_wait_on_bio_chain(&bio); + do_gettimeofday(&stop); + if (!ret) + ret = err2; + if (!ret) { + printk(KERN_CONT "\b\b\b\bdone\n"); + printk(KERN_CRIT "\n!!! Image After Compress: %lu MB\n", nr_pages_write*PAGE_SIZE>>20); + } else { + printk(KERN_CONT "\n"); + } + swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); +out_clean: + if (crc) { + if (crc->thr) + kthread_stop(crc->thr); + kfree(crc); + } + if (data) { + for (thr = 0; thr < nr_threads; thr++) + if (data[thr].thr) + kthread_stop(data[thr].thr); + vfree(data); + } + if (page) free_page((unsigned long)page); + + return ret; +} + +/** + * enough_swap - Make sure we have enough swap to save the image. + * + * Returns TRUE or FALSE after checking the total amount of swap + * space avaiable from the resume partition. + */ + +static int enough_swap(unsigned int nr_pages, unsigned int flags) +{ + unsigned int free_swap = count_swap_pages(root_swap, 1); + unsigned int required; + + pr_debug("PM: Free swap pages: %u\n", free_swap); + + required = PAGES_FOR_IO + nr_pages; + return free_swap > required; +} + +/** + * swsusp_write - Write entire image and metadata. + * @flags: flags to pass to the "boot" kernel in the image header + * + * It is important _NOT_ to umount filesystems at this point. We want + * them synced (in case something goes wrong) but we DO not want to mark + * filesystem clean: it is not. (And it does not matter, if we resume + * correctly, we'll mark system clean, anyway.) + */ + +int swsusp_write(unsigned int flags) +{ + struct swap_map_handle handle; + struct snapshot_handle snapshot; + struct swsusp_info *header; + unsigned long pages; + int error; + + pages = snapshot_get_image_size(); + error = get_swap_writer(&handle); + if (error) { + printk(KERN_ERR "PM: Cannot get swap writer\n"); + return error; + } + if (flags & SF_NOCOMPRESS_MODE) { + if (!enough_swap(pages, flags)) { + printk(KERN_ERR "PM: Not enough free swap\n"); + error = -ENOSPC; + goto out_finish; + } + } + memset(&snapshot, 0, sizeof(struct snapshot_handle)); + error = snapshot_read_next(&snapshot); + if (error < PAGE_SIZE) { + if (error >= 0) + error = -EFAULT; + + goto out_finish; + } + header = (struct swsusp_info *)data_of(snapshot); + error = swap_write_page(&handle, header, NULL); + if (!error) { + error = (flags & SF_NOCOMPRESS_MODE) ? + save_image(&handle, &snapshot, pages - 1) : + save_image_compress(&handle, &snapshot, pages - 1); + } +out_finish: + error = swap_writer_finish(&handle, flags, error); + return error; +} + +/** + * The following functions allow us to read data using a swap map + * in a file-alike way + */ + +static void release_swap_reader(struct swap_map_handle *handle) +{ + struct swap_map_page_list *tmp; + + while (handle->maps) { + if (handle->maps->map) + free_page((unsigned long)handle->maps->map); + tmp = handle->maps; + handle->maps = handle->maps->next; + kfree(tmp); + } + handle->cur = NULL; +} + +static int get_swap_reader(struct swap_map_handle *handle, + unsigned int *flags_p) +{ + int error; + struct swap_map_page_list *tmp, *last; + sector_t offset; + + *flags_p = swsusp_header->flags; + + if (!swsusp_header->image) /* how can this happen? */ + return -EINVAL; + + handle->cur = NULL; + last = handle->maps = NULL; + offset = swsusp_header->image; + while (offset) { + tmp = kmalloc(sizeof(*handle->maps), GFP_KERNEL); + if (!tmp) { + release_swap_reader(handle); + return -ENOMEM; + } + memset(tmp, 0, sizeof(*tmp)); + if (!handle->maps) + handle->maps = tmp; + if (last) + last->next = tmp; + last = tmp; + + tmp->map = (struct swap_map_page *) + __get_free_page(__GFP_WAIT | __GFP_HIGH); + if (!tmp->map) { + release_swap_reader(handle); + return -ENOMEM; + } + + error = hib_bio_read_page(offset, tmp->map, NULL); + if (error) { + release_swap_reader(handle); + return error; + } + offset = tmp->map->next_swap; + } + handle->k = 0; + handle->cur = handle->maps->map; + return 0; +} + +static int swap_read_page(struct swap_map_handle *handle, void *buf, + struct bio **bio_chain) +{ + sector_t offset; + int error; + struct swap_map_page_list *tmp; + + if (!handle->cur) + return -EINVAL; + offset = handle->cur->entries[handle->k]; + if (!offset) + return -EFAULT; + error = hib_bio_read_page(offset, buf, bio_chain); + if (error) + return error; + if (++handle->k >= MAP_PAGE_ENTRIES) { + handle->k = 0; + free_page((unsigned long)handle->maps->map); + tmp = handle->maps; + handle->maps = handle->maps->next; + kfree(tmp); + if (!handle->maps) + release_swap_reader(handle); + else + handle->cur = handle->maps->map; + } + return error; +} + +static int swap_reader_finish(struct swap_map_handle *handle) +{ + release_swap_reader(handle); + + return 0; +} + +/** + * load_image - load the image using the swap map handle + * @handle and the snapshot handle @snapshot + * (assume there are @nr_pages pages to load) + */ +#ifdef _PRINT_PAGE_CRC_ +extern +int is_original_addr;//flag to indicate if buffer address is the origianl or not. +#endif +static int load_image(struct swap_map_handle *handle, + struct snapshot_handle *snapshot, + unsigned int nr_to_read) +{ + unsigned int m; + int ret = 0; + struct timeval start; + struct timeval stop; + struct bio *bio; + int err2; + unsigned nr_pages; + #ifdef _PRINT_PAGE_CRC_//add by roger. + unsigned long crc_le; + unsigned char *virt_addr; + unsigned long pfn; + #endif + + printk(KERN_INFO "PM: Loading image data pages (%u pages) ... ", + nr_to_read); + m = nr_to_read / 100; + if (!m) + m = 1; + nr_pages = 0; + bio = NULL; + do_gettimeofday(&start); + for ( ; ; ) { + ret = snapshot_write_next(snapshot); + if (ret <= 0) + break; + #ifdef _PRINT_PAGE_CRC_ + // snapshot->sync_read = 1;//for page crc debug.. + #endif + if(snapshot->sync_read){/*test for improve read speed.*/ + //printk("\n!!! sync_read!!!\n"); + //snapshot->sync_read = 0; + } + ret = swap_read_page(handle, data_of(*snapshot), &bio); + if (ret) + break; + if (snapshot->sync_read) + ret = hib_wait_on_bio_chain(&bio); + if (ret) + break; +#ifdef _PRINT_PAGE_CRC_// add by roger. + if(is_original_addr){ + virt_addr = data_of(*snapshot); + pfn = page_to_pfn(virt_to_page(virt_addr)); + crc_le = crc32_le(0, virt_addr, PAGE_SIZE); + printk("pfn:%lu:phy_addr:0x%x:crc:%lu:virt_addr:0x%x\n", + pfn, pfn*PAGE_SIZE, crc_le, virt_addr); + } +#else + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); +#endif + + nr_pages++; + } + err2 = hib_wait_on_bio_chain(&bio); + do_gettimeofday(&stop); + if (!ret) + ret = err2; + if (!ret) { + printk("\b\b\b\bdone\n"); + snapshot_write_finalize(snapshot); + if (!snapshot_image_loaded(snapshot)) + ret = -ENODATA; + } else + printk("\n"); + swsusp_show_speed(&start, &stop, nr_to_read, "Read"); + return ret; +} + +/** + * Structure used for LZO data decompression. + */ +struct dec_data { + struct task_struct *thr; /* thread */ + atomic_t ready; /* ready to start flag */ + atomic_t stop; /* ready to stop flag */ + int ret; /* return code */ + wait_queue_head_t go; /* start decompression */ + wait_queue_head_t done; /* decompression done */ + size_t unc_len; /* uncompressed length */ + size_t cmp_len; /* compressed length */ + unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */ + unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */ +}; + +/** + * Deompression function that runs in its own thread. + */ +#ifdef COMPRESS_MODE_LZ4 +static int lz4_decompress_threadfn(void *data) +{ + struct dec_data *d = data; + + while (1) { + wait_event(d->go, atomic_read(&d->ready) || + kthread_should_stop()); + if (kthread_should_stop()) { + d->thr = NULL; + d->ret = -1; + atomic_set(&d->stop, 1); + wake_up(&d->done); + break; + } + atomic_set(&d->ready, 0); + + d->unc_len = LZO_UNC_SIZE; + d->ret = + lz4_decompress_unknownoutputsize(d->cmp + LZO_HEADER, + d->cmp_len, d->unc, + &d->unc_len); + atomic_set(&d->stop, 1); + wake_up(&d->done); + } + return 0; +} +#else +static int lzo_decompress_threadfn(void *data) +{ + struct dec_data *d = data; + while (1) { + wait_event(d->go, atomic_read(&d->ready) || + kthread_should_stop()); + if (kthread_should_stop()) { + d->thr = NULL; + d->ret = -1; + atomic_set(&d->stop, 1); + wake_up(&d->done); + break; + } + atomic_set(&d->ready, 0); + d->unc_len = LZO_UNC_SIZE; + d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len, + d->unc, &d->unc_len); + atomic_set(&d->stop, 1); + wake_up(&d->done); + } + return 0; +} +#endif + +/** + * load_image_lzo - Load compressed image data and decompress them with LZO. + * @handle: Swap map handle to use for loading data. + * @snapshot: Image to copy uncompressed data into. + * @nr_to_read: Number of pages to load. + */ +static int load_image_compress(struct swap_map_handle *handle, + struct snapshot_handle *snapshot, + unsigned int nr_to_read) +{ + unsigned int m; + int ret = 0; + int eof = 0; + struct bio *bio; + struct timeval start; + struct timeval stop; + unsigned nr_pages; + size_t off; + unsigned i, thr, run_threads, nr_threads; + unsigned ring = 0, pg = 0, ring_size = 0, + have = 0, want, need, asked = 0; + unsigned long read_pages = 0; + unsigned char **page = NULL; + struct dec_data *data = NULL; + struct crc_data *crc = NULL; + #ifdef _PRINT_PAGE_CRC_ + unsigned long crc_le; + unsigned char *virt_addr; + unsigned long pfn; + #endif + static unsigned long out_of_data; + static unsigned long wait_on_bio; + /* + * We'll limit the number of threads for decompression to limit memory + * footprint. + */ + //nr_threads = num_online_cpus() - 1; + nr_threads = num_online_cpus();//test multi-thread. + nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); + + page = vmalloc(sizeof(*page) * LZO_MAX_RD_PAGES); + if (!page) { + printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + ret = -ENOMEM; + goto out_clean; + } + + data = vmalloc(sizeof(*data) * nr_threads); + if (!data) { + printk(KERN_ERR "PM: Failed to allocate LZO data\n"); + ret = -ENOMEM; + goto out_clean; + } + for (thr = 0; thr < nr_threads; thr++) + memset(&data[thr], 0, offsetof(struct dec_data, go)); + + crc = kmalloc(sizeof(*crc), GFP_KERNEL); + if (!crc) { + printk(KERN_ERR "PM: Failed to allocate crc\n"); + ret = -ENOMEM; + goto out_clean; + } + memset(crc, 0, offsetof(struct crc_data, go)); + + /* + * Start the decompression threads. + */ + for (thr = 0; thr < nr_threads; thr++) { + init_waitqueue_head(&data[thr].go); + init_waitqueue_head(&data[thr].done); + +#ifdef COMPRESS_MODE_LZ4 + data[thr].thr = kthread_run(lz4_decompress_threadfn, + &data[thr], + "image_decompress/%u", thr); +#else + data[thr].thr = kthread_run(lzo_decompress_threadfn, + &data[thr], + "image_decompress/%u", thr); +#endif + if (IS_ERR(data[thr].thr)) { + data[thr].thr = NULL; + printk(KERN_ERR + "PM: Cannot start decompression threads\n"); + ret = -ENOMEM; + goto out_clean; + } + } +#ifndef _DISABLE_CRC_THREAD_ + /* + * Start the CRC32 thread. + */ + init_waitqueue_head(&crc->go); + init_waitqueue_head(&crc->done); + + handle->crc32 = 0; + crc->crc32 = &handle->crc32; + for (thr = 0; thr < nr_threads; thr++) { + crc->unc[thr] = data[thr].unc; + crc->unc_len[thr] = &data[thr].unc_len; + } + + crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32"); + if (IS_ERR(crc->thr)) { + crc->thr = NULL; + printk(KERN_ERR "PM: Cannot start CRC32 thread\n"); + ret = -ENOMEM; + goto out_clean; + } +#endif + /* + * Set the number of pages for read buffering. + * This is complete guesswork, because we'll only know the real + * picture once prepare_image() is called, which is much later on + * during the image load phase. We'll assume the worst case and + * say that none of the image pages are from high memory. + */ + if (low_free_pages() > snapshot_get_image_size()) + read_pages = (low_free_pages() - snapshot_get_image_size()) / 2; + printk("\n!!! read_pages:%lu:low_free_pages:%lu:image_size:%lu\n", + read_pages, low_free_pages(), snapshot_get_image_size()); + read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES); + + for (i = 0; i < read_pages; i++) { + page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? + __GFP_WAIT | __GFP_HIGH : + __GFP_WAIT | __GFP_NOWARN | + __GFP_NORETRY); + + if (!page[i]) { + if (i < LZO_CMP_PAGES) { + ring_size = i; + printk(KERN_ERR + "PM: Failed to allocate LZO pages\n"); + ret = -ENOMEM; + goto out_clean; + } else { + break; + } + } + } + want = ring_size = i; + + printk(KERN_INFO + "PM: Using %u thread(s) for decompression.\n" + "PM: Loading and decompressing image data (%u pages) ... ", + nr_threads, nr_to_read); + m = nr_to_read / 100; + if (!m) + m = 1; + nr_pages = 0; + bio = NULL; + do_gettimeofday(&start); + + ret = snapshot_write_next(snapshot); + if (ret <= 0) + goto out_finish; + + for(;;) { + for (i = 0; !eof && i < want; i++) { + ret = swap_read_page(handle, page[ring], &bio); + if (ret) { + /* + * On real read error, finish. On end of data, + * set EOF flag and just exit the read loop. + */ + if (handle->cur && + handle->cur->entries[handle->k]) { + goto out_finish; + } else { + eof = 1; + break; + } + } + if (++ring >= ring_size) + ring = 0; + } + asked += i; + want -= i; + + /* + * We are out of data, wait for some more. + */ + if (!have) { + if (!asked) + break; + out_of_data++; + ret = hib_wait_on_bio_chain(&bio); + if (ret) + goto out_finish; + have += asked; + asked = 0; + if (eof) + eof = 2; + } + #ifndef _DISABLE_CRC_THREAD_ + if (crc->run_threads) { + wait_event(crc->done, atomic_read(&crc->stop)); + atomic_set(&crc->stop, 0); + crc->run_threads = 0; + } + #endif + for (thr = 0; have && thr < nr_threads; thr++) { + data[thr].cmp_len = *(size_t *)page[pg]; + if (unlikely(!data[thr].cmp_len || data[thr].cmp_len > +#ifdef COMPRESS_MODE_LZ4 + lz4_worst_compress(LZO_UNC_SIZE) +#else + lzo1x_worst_compress(LZO_UNC_SIZE) +#endif + )) { + printk(KERN_ERR + "PM: Invalid LZO compressed length\n"); + ret = -1; + goto out_finish; + } + + need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER, + PAGE_SIZE); + if (need > have) { + if (eof > 1) { + ret = -1; + goto out_finish; + } + break; + } + + for (off = 0; + off < LZO_HEADER + data[thr].cmp_len; + off += PAGE_SIZE) { + memcpy(data[thr].cmp + off, + page[pg], PAGE_SIZE); + have--; + want++; + if (++pg >= ring_size) + pg = 0; + } + + atomic_set(&data[thr].ready, 1); + wake_up(&data[thr].go); + } + + /* + * Wait for more data while we are decompressing. + */ + if (have < LZO_CMP_PAGES && asked) { + wait_on_bio++; + ret = hib_wait_on_bio_chain(&bio); + if (ret) + goto out_finish; + have += asked; + asked = 0; + if (eof) + eof = 2; + } + + for (run_threads = thr, thr = 0; thr < run_threads; thr++) { + wait_event(data[thr].done, + atomic_read(&data[thr].stop)); + atomic_set(&data[thr].stop, 0); + + ret = data[thr].ret; + + if (ret < 0) { + printk(KERN_ERR + "PM: LZO decompression failed\n"); + goto out_finish; + } + + if (unlikely(!data[thr].unc_len || + data[thr].unc_len > LZO_UNC_SIZE || + data[thr].unc_len & (PAGE_SIZE - 1))) { + printk(KERN_ERR + "PM: Invalid LZO uncompressed length\n"); + ret = -1; + goto out_finish; + } + + for (off = 0; + off < data[thr].unc_len; off += PAGE_SIZE) { + memcpy(data_of(*snapshot), + data[thr].unc + off, PAGE_SIZE); + + #ifdef _PRINT_PAGE_CRC_// add by roger. + if(is_original_addr){ + virt_addr = data_of(*snapshot); + pfn = page_to_pfn(virt_to_page(virt_addr)); + crc_le = crc32_le(0, virt_addr, PAGE_SIZE); + printk("pfn:%lu:phy_addr:0x%x:crc:%lu:virt_addr:0x%x\n", + pfn, pfn*PAGE_SIZE, crc_le, virt_addr); + } + #else + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + #endif + nr_pages++; + + ret = snapshot_write_next(snapshot); + if (ret <= 0) { + #ifndef _DISABLE_CRC_THREAD_ + crc->run_threads = thr + 1; + atomic_set(&crc->ready, 1); + wake_up(&crc->go); + #endif + goto out_finish; + } + } + } + #ifndef _DISABLE_CRC_THREAD_ + crc->run_threads = thr; + atomic_set(&crc->ready, 1); + wake_up(&crc->go); + #endif + } + +out_finish: +#ifndef _DISABLE_CRC_THREAD_ + if (crc->run_threads) { + wait_event(crc->done, atomic_read(&crc->stop)); + atomic_set(&crc->stop, 0); + } +#endif + do_gettimeofday(&stop); + if (!ret) { + printk("\b\b\b\bdone\n"); + snapshot_write_finalize(snapshot); + if (!snapshot_image_loaded(snapshot)) + ret = -ENODATA; + if (!ret) { + #ifndef _DISABLE_CRC_THREAD_ + if (swsusp_header->flags & SF_CRC32_MODE) { + if(handle->crc32 != swsusp_header->crc32) { + printk(KERN_ERR + "PM: Invalid image CRC32!\n"); + ret = -ENODATA; + } + } + #endif + } + } else + printk("\n"); + swsusp_show_speed(&start, &stop, nr_to_read, "Read"); +#if 0 + printk("\nout_of_data:%lu:wait_on_bio:%lu\n", + out_of_data, wait_on_bio); +#endif +out_clean: + for (i = 0; i < ring_size; i++) + free_page((unsigned long)page[i]); + if (crc) { + if (crc->thr) + kthread_stop(crc->thr); + kfree(crc); + } + if (data) { + for (thr = 0; thr < nr_threads; thr++) + if (data[thr].thr) + kthread_stop(data[thr].thr); + vfree(data); + } + if (page) vfree(page); + + return ret; +} + +/* + based on load_image_lzo + no crc thread + use dual buffer to make IO / Lzo more parallize + +*/ +/* + buffer status definition: + u8 stat: + 7 ...4 : 3.. 0 + buffer 0 : buffer 1 + stat value : + 0 --> empty (or only submit bio request) + 1 --> filled full (after wait_on_io return) + 2 --> almost empty (not enough for filling lzo thread input buffer) + for example: 0x00 means buffer 0 and buffer 1 are both empty. + + state machine: +change state condition | current state | what is going to do + init 0x00 request to fill buf 0, wait on io for it to finish. + buf 0 full 0x10 run lzo thread on buf 0, request to fill buf 1(if not requested before) + buf 0 almost empty 0x20 wait on io for buf 1 to finish. + buf 1 full 0x21 run lzo thread, which will consume all data in buf 0 + buf 0 empty 0x01 run lzo thread on buf 1, request to fill buf 0 (if not requested before) + buf 1 almost empty 0x02 wait on io for buf 0 to finish + buf 0 full 0x12 run lzo thread, which will consume all data in buf 1 + buf 1 empty 0x10 go back to previous. + +*/ +typedef enum { + BOTH_EMPTY = 0, /* 0x00 */ + ONE_FULL, /* 0x10 or 0x01 */ + ONE_ALMOST_EMPTY, /* 0x20 or 0x02 */ + ANOTHER_FULL /* 0x21 or 0x12 */ +}buf_state; + +static int load_image_compress_dualbuf(struct swap_map_handle *handle, + struct snapshot_handle *snapshot, + unsigned int nr_to_read) +{ + + unsigned int m; + int ret = 0; + int eof = 0; + struct bio *bio; + struct timeval start; + struct timeval stop; + unsigned nr_pages; + size_t off; + unsigned i, thr, run_threads, nr_threads; + unsigned ring = 0, pg = 0, ring_size = 0, + have = 0, want, need, asked = 0; + unsigned long read_pages = 0; + unsigned char **page = NULL; + struct dec_data *data = NULL; + + buf_state buffer_stat = BOTH_EMPTY; + int reminder = 0; + size_t next_cmp_len = 0; + + /* + * We'll limit the number of threads for decompression to limit memory + * footprint. + */ + nr_threads = num_online_cpus() - 1; + nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); + + page = vmalloc(sizeof(*page) * LZO_MAX_RD_PAGES); + if (!page) { + printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + ret = -ENOMEM; + goto out_clean; + } + + data = vmalloc(sizeof(*data) * nr_threads); + if (!data) { + printk(KERN_ERR "PM: Failed to allocate LZO data\n"); + ret = -ENOMEM; + goto out_clean; + } + for (thr = 0; thr < nr_threads; thr++) + memset(&data[thr], 0, offsetof(struct dec_data, go)); + + /* + * Start the decompression threads. + */ + for (thr = 0; thr < nr_threads; thr++) { + init_waitqueue_head(&data[thr].go); + init_waitqueue_head(&data[thr].done); + +#ifdef COMPRESS_MODE_LZ4 + data[thr].thr = kthread_run(lz4_decompress_threadfn, + &data[thr], + "image_decompress/%u", thr); +#else + data[thr].thr = kthread_run(lzo_decompress_threadfn, + &data[thr], + "image_decompress/%u", thr); +#endif + if (IS_ERR(data[thr].thr)) { + data[thr].thr = NULL; + printk(KERN_ERR + "PM: Cannot start decompression threads\n"); + ret = -ENOMEM; + goto out_clean; + } + } + + /* + * Set the number of pages for read buffering. + * This is complete guesswork, because we'll only know the real + * picture once prepare_image() is called, which is much later on + * during the image load phase. We'll assume the worst case and + * say that none of the image pages are from high memory. + */ + if (low_free_pages() > snapshot_get_image_size()) + read_pages = (low_free_pages() - snapshot_get_image_size()) / 2; + //printk("\n!!! read_pages:%lu:low_free_pages:%lu:image_size:%lu\n", + // read_pages, low_free_pages(), snapshot_get_image_size()); + read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES); + //printk("\n actual read_pages = %lu\n", read_pages); + + for (i = 0; i < read_pages; i++) { + page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? + __GFP_WAIT | __GFP_HIGH : + __GFP_WAIT | __GFP_NOWARN | + __GFP_NORETRY); + + if (!page[i]) {/*get free page failed.*/ + if (i < LZO_CMP_PAGES) { + ring_size = i; + printk(KERN_ERR + "PM: Failed to allocate LZO pages\n"); + ret = -ENOMEM; + goto out_clean; + } else { + break; + } + } + } + + reminder = i % LZO_BUF_BLOCK_NUM; + if(reminder) + i = i - reminder; + ring_size = i; /*want = ring_size <= read_pages*/ + want = ring_size / LZO_BUF_BLOCK_NUM; + + printk(KERN_CRIT"allocate %u pages as Load Ring buffer\n", ring_size); + printk(KERN_INFO + "PM: Using %u thread(s) for decompression.\n" + "PM: Loading and decompressing image data (%u pages) ... ", + nr_threads, nr_to_read); + m = nr_to_read / 100; + if (!m) + m = 1; + nr_pages = 0; + bio = NULL; + + do_gettimeofday(&start); + + ret = snapshot_write_next(snapshot); + if (ret <= 0) + goto out_finish; + + buffer_stat = BOTH_EMPTY; +/*request to fill buf 0, wait on io for it to finish.*/ + + for (i = 0; !eof && i < want; i++) { + ret = swap_read_page(handle, page[ring], &bio); + if (ret) { + /* + * On real read error, finish. On end of data, + * set EOF flag and just exit the read loop. + */ + if (handle->cur && + handle->cur->entries[handle->k]) {/* has unloaded pages.*/ + goto out_finish; + } else {/*EOF*/ + eof = 1; /*still need to wait on io later.*/ + break; + } + } + /*ret = 0*/ + if (++ring >= ring_size) /*ring buffer reach end*/ + ring = 0; + } + asked += i; + + ret = hib_wait_on_bio_chain(&bio);/* make sure read io finished */ + + if (ret) + goto out_finish; + have += asked; + asked = 0; + if (eof) + eof = 2; /* already return from wait on io. */ + + buffer_stat = ONE_FULL; + + /* send resquest for next buffer immediately */ + for (i = 0; !eof && i < want; i++) { /*load num = want lzo pages from disk*/ + ret = swap_read_page(handle, page[ring], &bio); + if (ret) { + /* + * On real read error, finish. On end of data, + * set EOF flag and just exit the read loop. + */ + if (handle->cur && + handle->cur->entries[handle->k]) {/* has unloaded pages.*/ + goto out_finish; + } else {/*EOF*/ + eof = 1; + break; + } + } + /*ret = 0*/ + if (++ring >= ring_size) /*ring buffer full*/ + ring = 0; + } + asked += i; + +for(;;){ + +loop_start: + /*run lzo thread on buf 0 */ + for (thr = 0; have && thr < nr_threads; thr++) { + data[thr].cmp_len = *(size_t *) page[pg]; /* Read LZO_HEADER */ +// printk("data[%d].cpm_len = %u\n", thr, data[thr].cmp_len); + + if (unlikely(!data[thr].cmp_len || + data[thr].cmp_len > + #ifdef COMPRESS_MODE_LZ4 + lz4_worst_compress(LZO_UNC_SIZE) + #else + lzo1x_worst_compress(LZO_UNC_SIZE) + #endif + )) { + printk(KERN_ERR + "PM: AAA: Invalid LZO compressed length\n"); +// printk("worst_compress %lu", lz4_worst_compress(LZO_UNC_SIZE)); + printk("buffer_stat = %d\n", buffer_stat); + ret = -1; + goto out_finish; + } + need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER, + PAGE_SIZE); + if (need > have) { + if (eof > 1) { + ret = -1; + goto out_finish; + } + break; + } + + for (off = 0; /* copy data from page[pg] to lzo thread buffer*/ + off < LZO_HEADER + data[thr].cmp_len; + off += PAGE_SIZE) { + memcpy(data[thr].cmp + off, + page[pg], PAGE_SIZE); + have--; /*one page is comsumed by lzo decompress thread.*/ + if (++pg >= ring_size) /* ring buffer is empty*/ + pg = 0; + } + + atomic_set(&data[thr].ready, 1); /* set ready bit*/ + wake_up(&data[thr].go); /* wake up corresponding lzo thread*/ + } + + /*get the next cmp data length*/ + next_cmp_len = DIV_ROUND_UP(*(size_t *) page[pg] + LZO_HEADER, + PAGE_SIZE); + + if(buffer_stat == ANOTHER_FULL){ + + if( unlikely(((ring_size/2-pg) < next_cmp_len) + || ((ring_size-pg) < next_cmp_len) ) + ){ + /* printk(KERN_CRIT"!!! pg = %u, ring_size/2 = %u,"\ + " skip sending request.\n", pg, ring_size/2); */ + goto skip_req; + } + + /* all data in previous buffer was loaded into lzo thread for decompression, + before it finish we have some time to send IO request to load the previous buffer. + */ + for (i = 0; !eof && i < want; i++) { + ret = swap_read_page(handle, page[ring], &bio); + if (ret) { + /* + * On real read error, finish. On end of data, + * set EOF flag and just exit the read loop. + */ + if (handle->cur && + handle->cur->entries[handle->k]) {/* has unloaded pages.*/ + goto out_finish; + } else {/*EOF*/ + eof = 1; /*still need to wait on io later.*/ + break; + } + } + /*ret = 0*/ + if (++ring >= ring_size) /*ring buffer reach end*/ + ring = 0; + } + asked += i; + } + +skip_req: + for (run_threads = thr, thr = 0; thr < run_threads; thr++) {/*wait for every running thread finish*/ + + + wait_event(data[thr].done, + atomic_read(&data[thr].stop)); + atomic_set(&data[thr].stop, 0); + + ret = data[thr].ret; + + if (ret < 0) { + printk(KERN_ERR + "PM: LZO decompression failed\n"); + goto out_finish; + } + + if (unlikely(!data[thr].unc_len || + data[thr].unc_len > LZO_UNC_SIZE || + data[thr].unc_len & (PAGE_SIZE - 1))) { + printk(KERN_ERR + "PM: BBB: Invalid LZO uncompressed length\n"); + printk("buffer_stat = %d\n", buffer_stat); + ret = -1; + goto out_finish; + } + for (off = 0; /* copy data from lzo thread output to snapshot memory*/ + off < data[thr].unc_len; off += PAGE_SIZE) { + memcpy(data_of(*snapshot), + data[thr].unc + off, PAGE_SIZE); + + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + + ret = snapshot_write_next(snapshot); + if (ret <= 0) { + goto out_finish; + } + } + } + if(buffer_stat == ONE_FULL){ + if (unlikely(have < next_cmp_len && asked)){ + buffer_stat = ONE_ALMOST_EMPTY; + goto one_almost_empty; + }else{ + goto loop_start; + } + }else{ // buffer_stat should be ANOTHER_FULL + if(asked || eof){ + buffer_stat = ONE_FULL; + }else{ + /*printk(KERN_CRIT "buffer_state:%u , eof=%d: havn't request IO yet.\n", + buffer_stat, eof); */ + } + goto loop_start; + } +one_almost_empty: + /* wait on io for buf 1 to finish. */ + ret = hib_wait_on_bio_chain(&bio); + + + if (ret) + goto out_finish; + have += asked; /* all 'asked' data become 'have' data , and reset 'asked' to 0 */ + asked = 0; + if (eof) + eof = 2; + + buffer_stat = ANOTHER_FULL; + +} + +out_finish: + do_gettimeofday(&stop); + + if (!ret) { + printk("\b\b\b\bdone\n"); + snapshot_write_finalize(snapshot); + if (!snapshot_image_loaded(snapshot)) + ret = -ENODATA; + } else + printk("\n"); + swsusp_show_speed(&start, &stop, nr_to_read, "Read"); + +out_clean: + for (i = 0; i < ring_size; i++) + free_page((unsigned long)page[i]); + if (data) { + for (thr = 0; thr < nr_threads; thr++) + if (data[thr].thr) + kthread_stop(data[thr].thr); + vfree(data); + } + if (page) vfree(page); + + return ret; + +} + +/** + * swsusp_read - read the hibernation image. + * @flags_p: flags passed by the "frozen" kernel in the image header should + * be written into this memory location + */ + +int swsusp_read(unsigned int *flags_p) +{ + int error; + struct swap_map_handle handle; + struct snapshot_handle snapshot; + struct swsusp_info *header; + + memset(&snapshot, 0, sizeof(struct snapshot_handle)); + error = snapshot_write_next(&snapshot); + if (error < PAGE_SIZE) + return error < 0 ? error : -EFAULT; + header = (struct swsusp_info *)data_of(snapshot); + error = get_swap_reader(&handle, flags_p); + if (error) + goto end; + if (!error) + error = swap_read_page(&handle, header, NULL); + if (!error) { + error = (*flags_p & SF_NOCOMPRESS_MODE) ? + load_image(&handle, &snapshot, header->pages - 1) : + load_image_compress_dualbuf(&handle, &snapshot, + header->pages - 1); + } + swap_reader_finish(&handle); +end: + if (!error) + pr_debug("PM: Image successfully loaded\n"); + else + pr_debug("PM: Error %d resuming\n", error); + return error; +} + +/** + * swsusp_check - Check for swsusp signature in the resume device + */ + +int swsusp_check(void) +{ + int error; + + hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, + FMODE_READ, NULL); + if (!IS_ERR(hib_resume_bdev)) { + set_blocksize(hib_resume_bdev, PAGE_SIZE); + clear_page(swsusp_header); + error = hib_bio_read_page(swsusp_resume_block, + swsusp_header, NULL); + if (error) + goto put; + + if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { + memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); + /* Reset swap signature now */ + error = hib_bio_write_page(swsusp_resume_block, + swsusp_header, NULL); + } else { + error = -EINVAL; + } + +put: + if (error) + blkdev_put(hib_resume_bdev, FMODE_READ); + else + pr_debug("PM: Image signature found, resuming\n"); + } else { + error = PTR_ERR(hib_resume_bdev); + } + + if (error) + pr_debug("PM: Image not found (code %d)\n", error); + + return error; +} + +/** + * swsusp_close - close swap device. + */ + +void swsusp_close(fmode_t mode) +{ + if (IS_ERR(hib_resume_bdev)) { + pr_debug("PM: Image device not initialised\n"); + return; + } + + blkdev_put(hib_resume_bdev, mode); +} + +static int swsusp_header_init(void) +{ + swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL); + if (!swsusp_header) + panic("Could not allocate memory for swsusp_header\n"); + return 0; +} + +core_initcall(swsusp_header_init); diff --git a/kernel/power/user.c b/kernel/power/user.c new file mode 100644 index 00000000..91b0fd02 --- /dev/null +++ b/kernel/power/user.c @@ -0,0 +1,464 @@ +/* + * linux/kernel/power/user.c + * + * This file provides the user space interface for software suspend/resume. + * + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * This file is released under the GPLv2. + * + */ + +#include <linux/suspend.h> +#include <linux/syscalls.h> +#include <linux/reboot.h> +#include <linux/string.h> +#include <linux/device.h> +#include <linux/miscdevice.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/pm.h> +#include <linux/fs.h> +#include <linux/compat.h> +#include <linux/console.h> +#include <linux/cpu.h> +#include <linux/freezer.h> +#include <scsi/scsi_scan.h> + +#include <asm/uaccess.h> + +#include "power.h" + + +#define SNAPSHOT_MINOR 231 + +static struct snapshot_data { + struct snapshot_handle handle; + int swap; + int mode; + char frozen; + char ready; + char platform_support; +} snapshot_state; + +atomic_t snapshot_device_available = ATOMIC_INIT(1); + +static int snapshot_open(struct inode *inode, struct file *filp) +{ + struct snapshot_data *data; + int error; + + lock_system_sleep(); + + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } + + if ((filp->f_flags & O_ACCMODE) == O_RDWR) { + atomic_inc(&snapshot_device_available); + error = -ENOSYS; + goto Unlock; + } + if(create_basic_memory_bitmaps()) { + atomic_inc(&snapshot_device_available); + error = -ENOMEM; + goto Unlock; + } + nonseekable_open(inode, filp); + data = &snapshot_state; + filp->private_data = data; + memset(&data->handle, 0, sizeof(struct snapshot_handle)); + if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { + /* Hibernating. The image device should be accessible. */ + data->swap = swsusp_resume_device ? + swap_type_of(swsusp_resume_device, 0, NULL) : -1; + data->mode = O_RDONLY; + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + if (error) + pm_notifier_call_chain(PM_POST_HIBERNATION); + } else { + /* + * Resuming. We may need to wait for the image device to + * appear. + */ + wait_for_device_probe(); + scsi_complete_async_scans(); + + data->swap = -1; + data->mode = O_WRONLY; + error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + if (error) + pm_notifier_call_chain(PM_POST_RESTORE); + } + if (error) { + free_basic_memory_bitmaps(); + atomic_inc(&snapshot_device_available); + } + data->frozen = 0; + data->ready = 0; + data->platform_support = 0; + + Unlock: + unlock_system_sleep(); + + return error; +} + +static int snapshot_release(struct inode *inode, struct file *filp) +{ + struct snapshot_data *data; + + lock_system_sleep(); + + swsusp_free(); + free_basic_memory_bitmaps(); + data = filp->private_data; + free_all_swap_pages(data->swap); + if (data->frozen) { + pm_restore_gfp_mask(); + thaw_processes(); + } + pm_notifier_call_chain(data->mode == O_RDONLY ? + PM_POST_HIBERNATION : PM_POST_RESTORE); + atomic_inc(&snapshot_device_available); + + unlock_system_sleep(); + + return 0; +} + +static ssize_t snapshot_read(struct file *filp, char __user *buf, + size_t count, loff_t *offp) +{ + struct snapshot_data *data; + ssize_t res; + loff_t pg_offp = *offp & ~PAGE_MASK; + + lock_system_sleep(); + + data = filp->private_data; + if (!data->ready) { + res = -ENODATA; + goto Unlock; + } + if (!pg_offp) { /* on page boundary? */ + res = snapshot_read_next(&data->handle); + if (res <= 0) + goto Unlock; + } else { + res = PAGE_SIZE - pg_offp; + } + + res = simple_read_from_buffer(buf, count, &pg_offp, + data_of(data->handle), res); + if (res > 0) + *offp += res; + + Unlock: + unlock_system_sleep(); + + return res; +} + +static ssize_t snapshot_write(struct file *filp, const char __user *buf, + size_t count, loff_t *offp) +{ + struct snapshot_data *data; + ssize_t res; + loff_t pg_offp = *offp & ~PAGE_MASK; + + lock_system_sleep(); + + data = filp->private_data; + + if (!pg_offp) { + res = snapshot_write_next(&data->handle); + if (res <= 0) + goto unlock; + } else { + res = PAGE_SIZE - pg_offp; + } + + res = simple_write_to_buffer(data_of(data->handle), res, &pg_offp, + buf, count); + if (res > 0) + *offp += res; +unlock: + unlock_system_sleep(); + + return res; +} + +static long snapshot_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + int error = 0; + struct snapshot_data *data; + loff_t size; + sector_t offset; + + if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC) + return -ENOTTY; + if (_IOC_NR(cmd) > SNAPSHOT_IOC_MAXNR) + return -ENOTTY; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!mutex_trylock(&pm_mutex)) + return -EBUSY; + + data = filp->private_data; + + switch (cmd) { + + case SNAPSHOT_FREEZE: + if (data->frozen) + break; + + printk("Syncing filesystems ... "); + sys_sync(); + printk("done.\n"); + + error = freeze_processes(); + if (!error) + data->frozen = 1; + break; + + case SNAPSHOT_UNFREEZE: + if (!data->frozen || data->ready) + break; + pm_restore_gfp_mask(); + thaw_processes(); + data->frozen = 0; + break; + + case SNAPSHOT_CREATE_IMAGE: + if (data->mode != O_RDONLY || !data->frozen || data->ready) { + error = -EPERM; + break; + } + pm_restore_gfp_mask(); + error = hibernation_snapshot(data->platform_support); + if (!error) { + error = put_user(in_suspend, (int __user *)arg); + data->ready = !freezer_test_done && !error; + freezer_test_done = false; + } + break; + + case SNAPSHOT_ATOMIC_RESTORE: + snapshot_write_finalize(&data->handle); + if (data->mode != O_WRONLY || !data->frozen || + !snapshot_image_loaded(&data->handle)) { + error = -EPERM; + break; + } + error = hibernation_restore(data->platform_support); + break; + + case SNAPSHOT_FREE: + swsusp_free(); + memset(&data->handle, 0, sizeof(struct snapshot_handle)); + data->ready = 0; + /* + * It is necessary to thaw kernel threads here, because + * SNAPSHOT_CREATE_IMAGE may be invoked directly after + * SNAPSHOT_FREE. In that case, if kernel threads were not + * thawed, the preallocation of memory carried out by + * hibernation_snapshot() might run into problems (i.e. it + * might fail or even deadlock). + */ + thaw_kernel_threads(); + break; + + case SNAPSHOT_PREF_IMAGE_SIZE: + image_size = arg; + break; + + case SNAPSHOT_GET_IMAGE_SIZE: + if (!data->ready) { + error = -ENODATA; + break; + } + size = snapshot_get_image_size(); + size <<= PAGE_SHIFT; + error = put_user(size, (loff_t __user *)arg); + break; + + case SNAPSHOT_AVAIL_SWAP_SIZE: + size = count_swap_pages(data->swap, 1); + size <<= PAGE_SHIFT; + error = put_user(size, (loff_t __user *)arg); + break; + + case SNAPSHOT_ALLOC_SWAP_PAGE: + if (data->swap < 0 || data->swap >= MAX_SWAPFILES) { + error = -ENODEV; + break; + } + offset = alloc_swapdev_block(data->swap); + if (offset) { + offset <<= PAGE_SHIFT; + error = put_user(offset, (loff_t __user *)arg); + } else { + error = -ENOSPC; + } + break; + + case SNAPSHOT_FREE_SWAP_PAGES: + if (data->swap < 0 || data->swap >= MAX_SWAPFILES) { + error = -ENODEV; + break; + } + free_all_swap_pages(data->swap); + break; + + case SNAPSHOT_S2RAM: + if (!data->frozen) { + error = -EPERM; + break; + } + /* + * Tasks are frozen and the notifiers have been called with + * PM_HIBERNATION_PREPARE + */ + error = suspend_devices_and_enter(PM_SUSPEND_MEM); + data->ready = 0; + break; + + case SNAPSHOT_PLATFORM_SUPPORT: + data->platform_support = !!arg; + break; + + case SNAPSHOT_POWER_OFF: + if (data->platform_support) + error = hibernation_platform_enter(); + break; + + case SNAPSHOT_SET_SWAP_AREA: + if (swsusp_swap_in_use()) { + error = -EPERM; + } else { + struct resume_swap_area swap_area; + dev_t swdev; + + error = copy_from_user(&swap_area, (void __user *)arg, + sizeof(struct resume_swap_area)); + if (error) { + error = -EFAULT; + break; + } + + /* + * User space encodes device types as two-byte values, + * so we need to recode them + */ + swdev = new_decode_dev(swap_area.dev); + if (swdev) { + offset = swap_area.offset; + data->swap = swap_type_of(swdev, offset, NULL); + if (data->swap < 0) + error = -ENODEV; + } else { + data->swap = -1; + error = -EINVAL; + } + } + break; + + default: + error = -ENOTTY; + + } + + mutex_unlock(&pm_mutex); + + return error; +} + +#ifdef CONFIG_COMPAT + +struct compat_resume_swap_area { + compat_loff_t offset; + u32 dev; +} __packed; + +static long +snapshot_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + BUILD_BUG_ON(sizeof(loff_t) != sizeof(compat_loff_t)); + + switch (cmd) { + case SNAPSHOT_GET_IMAGE_SIZE: + case SNAPSHOT_AVAIL_SWAP_SIZE: + case SNAPSHOT_ALLOC_SWAP_PAGE: { + compat_loff_t __user *uoffset = compat_ptr(arg); + loff_t offset; + mm_segment_t old_fs; + int err; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = snapshot_ioctl(file, cmd, (unsigned long) &offset); + set_fs(old_fs); + if (!err && put_user(offset, uoffset)) + err = -EFAULT; + return err; + } + + case SNAPSHOT_CREATE_IMAGE: + return snapshot_ioctl(file, cmd, + (unsigned long) compat_ptr(arg)); + + case SNAPSHOT_SET_SWAP_AREA: { + struct compat_resume_swap_area __user *u_swap_area = + compat_ptr(arg); + struct resume_swap_area swap_area; + mm_segment_t old_fs; + int err; + + err = get_user(swap_area.offset, &u_swap_area->offset); + err |= get_user(swap_area.dev, &u_swap_area->dev); + if (err) + return -EFAULT; + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = snapshot_ioctl(file, SNAPSHOT_SET_SWAP_AREA, + (unsigned long) &swap_area); + set_fs(old_fs); + return err; + } + + default: + return snapshot_ioctl(file, cmd, arg); + } +} + +#endif /* CONFIG_COMPAT */ + +static const struct file_operations snapshot_fops = { + .open = snapshot_open, + .release = snapshot_release, + .read = snapshot_read, + .write = snapshot_write, + .llseek = no_llseek, + .unlocked_ioctl = snapshot_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = snapshot_compat_ioctl, +#endif +}; + +static struct miscdevice snapshot_device = { + .minor = SNAPSHOT_MINOR, + .name = "snapshot", + .fops = &snapshot_fops, +}; + +static int __init snapshot_device_init(void) +{ + return misc_register(&snapshot_device); +}; + +device_initcall(snapshot_device_init); diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c new file mode 100644 index 00000000..c8fba338 --- /dev/null +++ b/kernel/power/wakelock.c @@ -0,0 +1,259 @@ +/* + * kernel/power/wakelock.c + * + * User space wakeup sources support. + * + * Copyright (C) 2012 Rafael J. Wysocki <rjw@sisk.pl> + * + * This code is based on the analogous interface allowing user space to + * manipulate wakelocks on Android. + */ + +#include <linux/ctype.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/hrtimer.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/slab.h> + +static DEFINE_MUTEX(wakelocks_lock); + +struct wakelock { + char *name; + struct rb_node node; + struct wakeup_source ws; +#ifdef CONFIG_PM_WAKELOCKS_GC + struct list_head lru; +#endif +}; + +static struct rb_root wakelocks_tree = RB_ROOT; + +ssize_t pm_show_wakelocks(char *buf, bool show_active) +{ + struct rb_node *node; + struct wakelock *wl; + char *str = buf; + char *end = buf + PAGE_SIZE; + + mutex_lock(&wakelocks_lock); + + for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) { + wl = rb_entry(node, struct wakelock, node); + if (wl->ws.active == show_active) + str += scnprintf(str, end - str, "%s ", wl->name); + } + if (str > buf) + str--; + + str += scnprintf(str, end - str, "\n"); + + mutex_unlock(&wakelocks_lock); + return (str - buf); +} + +#if CONFIG_PM_WAKELOCKS_LIMIT > 0 +static unsigned int number_of_wakelocks; + +static inline bool wakelocks_limit_exceeded(void) +{ + return number_of_wakelocks > CONFIG_PM_WAKELOCKS_LIMIT; +} + +static inline void increment_wakelocks_number(void) +{ + number_of_wakelocks++; +} + +static inline void decrement_wakelocks_number(void) +{ + number_of_wakelocks--; +} +#else /* CONFIG_PM_WAKELOCKS_LIMIT = 0 */ +static inline bool wakelocks_limit_exceeded(void) { return false; } +static inline void increment_wakelocks_number(void) {} +static inline void decrement_wakelocks_number(void) {} +#endif /* CONFIG_PM_WAKELOCKS_LIMIT */ + +#ifdef CONFIG_PM_WAKELOCKS_GC +#define WL_GC_COUNT_MAX 100 +#define WL_GC_TIME_SEC 300 + +static LIST_HEAD(wakelocks_lru_list); +static unsigned int wakelocks_gc_count; + +static inline void wakelocks_lru_add(struct wakelock *wl) +{ + list_add(&wl->lru, &wakelocks_lru_list); +} + +static inline void wakelocks_lru_most_recent(struct wakelock *wl) +{ + list_move(&wl->lru, &wakelocks_lru_list); +} + +static void wakelocks_gc(void) +{ + struct wakelock *wl, *aux; + ktime_t now; + + if (++wakelocks_gc_count <= WL_GC_COUNT_MAX) + return; + + now = ktime_get(); + list_for_each_entry_safe_reverse(wl, aux, &wakelocks_lru_list, lru) { + u64 idle_time_ns; + bool active; + + spin_lock_irq(&wl->ws.lock); + idle_time_ns = ktime_to_ns(ktime_sub(now, wl->ws.last_time)); + active = wl->ws.active; + spin_unlock_irq(&wl->ws.lock); + + if (idle_time_ns < ((u64)WL_GC_TIME_SEC * NSEC_PER_SEC)) + break; + + if (!active) { + wakeup_source_remove(&wl->ws); + rb_erase(&wl->node, &wakelocks_tree); + list_del(&wl->lru); + kfree(wl->name); + kfree(wl); + decrement_wakelocks_number(); + } + } + wakelocks_gc_count = 0; +} +#else /* !CONFIG_PM_WAKELOCKS_GC */ +static inline void wakelocks_lru_add(struct wakelock *wl) {} +static inline void wakelocks_lru_most_recent(struct wakelock *wl) {} +static inline void wakelocks_gc(void) {} +#endif /* !CONFIG_PM_WAKELOCKS_GC */ + +static struct wakelock *wakelock_lookup_add(const char *name, size_t len, + bool add_if_not_found) +{ + struct rb_node **node = &wakelocks_tree.rb_node; + struct rb_node *parent = *node; + struct wakelock *wl; + + while (*node) { + int diff; + + parent = *node; + wl = rb_entry(*node, struct wakelock, node); + diff = strncmp(name, wl->name, len); + if (diff == 0) { + if (wl->name[len]) + diff = -1; + else + return wl; + } + if (diff < 0) + node = &(*node)->rb_left; + else + node = &(*node)->rb_right; + } + if (!add_if_not_found) + return ERR_PTR(-EINVAL); + + if (wakelocks_limit_exceeded()) + return ERR_PTR(-ENOSPC); + + /* Not found, we have to add a new one. */ + wl = kzalloc(sizeof(*wl), GFP_KERNEL); + if (!wl) + return ERR_PTR(-ENOMEM); + + wl->name = kstrndup(name, len, GFP_KERNEL); + if (!wl->name) { + kfree(wl); + return ERR_PTR(-ENOMEM); + } + wl->ws.name = wl->name; + wakeup_source_add(&wl->ws); + rb_link_node(&wl->node, parent, node); + rb_insert_color(&wl->node, &wakelocks_tree); + wakelocks_lru_add(wl); + increment_wakelocks_number(); + return wl; +} + +int pm_wake_lock(const char *buf) +{ + const char *str = buf; + struct wakelock *wl; + u64 timeout_ns = 0; + size_t len; + int ret = 0; + + while (*str && !isspace(*str)) + str++; + + len = str - buf; + if (!len) + return -EINVAL; + + if (*str && *str != '\n') { + /* Find out if there's a valid timeout string appended. */ + ret = kstrtou64(skip_spaces(str), 10, &timeout_ns); + if (ret) + return -EINVAL; + } + + mutex_lock(&wakelocks_lock); + + wl = wakelock_lookup_add(buf, len, true); + if (IS_ERR(wl)) { + ret = PTR_ERR(wl); + goto out; + } + if (timeout_ns) { + u64 timeout_ms = timeout_ns + NSEC_PER_MSEC - 1; + + do_div(timeout_ms, NSEC_PER_MSEC); + __pm_wakeup_event(&wl->ws, timeout_ms); + } else { + __pm_stay_awake(&wl->ws); + } + + wakelocks_lru_most_recent(wl); + + out: + mutex_unlock(&wakelocks_lock); + return ret; +} + +int pm_wake_unlock(const char *buf) +{ + struct wakelock *wl; + size_t len; + int ret = 0; + + len = strlen(buf); + if (!len) + return -EINVAL; + + if (buf[len-1] == '\n') + len--; + + if (!len) + return -EINVAL; + + mutex_lock(&wakelocks_lock); + + wl = wakelock_lookup_add(buf, len, false); + if (IS_ERR(wl)) { + ret = PTR_ERR(wl); + goto out; + } + __pm_relax(&wl->ws); + + wakelocks_lru_most_recent(wl); + wakelocks_gc(); + + out: + mutex_unlock(&wakelocks_lock); + return ret; +} |