From cb14df71f8a2da98fd242a35143613c55525ca44 Mon Sep 17 00:00:00 2001 From: kitakar5525 <34676735+kitakar5525@users.noreply.github.com> Date: Wed, 31 Jul 2019 08:41:30 +0900 Subject: [PATCH 02/12] suspend Note: NVMe part will be merged into Linux 5.3. Remove the part in this patch when it arrives. --- drivers/nvme/host/core.c | 24 ++++++++-- drivers/nvme/host/nvme.h | 6 +++ drivers/nvme/host/pci.c | 95 ++++++++++++++++++++++++++++++++++++++-- kernel/power/suspend.c | 11 +++++ kernel/sysctl.c | 9 ++++ 5 files changed, 139 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 963b4c6309b9..4b8cf243c150 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1114,15 +1114,15 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, return id; } -static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, - void *buffer, size_t buflen, u32 *result) +static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, + unsigned int dword11, void *buffer, size_t buflen, u32 *result) { struct nvme_command c; union nvme_result res; int ret; memset(&c, 0, sizeof(c)); - c.features.opcode = nvme_admin_set_features; + c.features.opcode = op; c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); @@ -1133,6 +1133,24 @@ static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword return ret; } +int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, + unsigned int dword11, void *buffer, size_t buflen, + u32 *result) +{ + return nvme_features(dev, nvme_admin_set_features, fid, dword11, buffer, + buflen, result); +} +EXPORT_SYMBOL_GPL(nvme_set_features); + +int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid, + unsigned int dword11, void *buffer, size_t buflen, + u32 *result) +{ + return nvme_features(dev, nvme_admin_get_features, fid, dword11, buffer, + buflen, result); +} +EXPORT_SYMBOL_GPL(nvme_get_features); + int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) { u32 q_count = (*count - 1) | ((*count - 1) << 16); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 81215ca32671..9285d5f6437b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -459,6 +459,12 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, blk_mq_req_flags_t flags, bool poll); +int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, + unsigned int dword11, void *buffer, size_t buflen, + u32 *result); +int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid, + unsigned int dword11, void *buffer, size_t buflen, + u32 *result); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 09ffd21d1809..3e22d5f14e93 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -116,6 +117,7 @@ struct nvme_dev { u32 cmbsz; u32 cmbloc; struct nvme_ctrl ctrl; + u32 last_ps; mempool_t *iod_mempool; @@ -2849,16 +2851,94 @@ static void nvme_remove(struct pci_dev *pdev) } #ifdef CONFIG_PM_SLEEP +static int nvme_get_power_state(struct nvme_ctrl *ctrl, u32 *ps) +{ + return nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0, ps); +} + +static int nvme_set_power_state(struct nvme_ctrl *ctrl, u32 ps) +{ + return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL, 0, NULL); +} + +static int nvme_resume(struct device *dev) +{ + struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); + struct nvme_ctrl *ctrl = &ndev->ctrl; + + if (pm_resume_via_firmware() || !ctrl->npss || + nvme_set_power_state(ctrl, ndev->last_ps) != 0) + nvme_reset_ctrl(ctrl); + return 0; +} + static int nvme_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); + struct nvme_ctrl *ctrl = &ndev->ctrl; + int ret = -EBUSY; + + /* + * The platform does not remove power for a kernel managed suspend so + * use host managed nvme power settings for lowest idle power if + * possible. This should have quicker resume latency than a full device + * shutdown. But if the firmware is involved after the suspend or the + * device does not support any non-default power states, shut down the + * device fully. + */ + if (pm_suspend_via_firmware() || !ctrl->npss) { + nvme_dev_disable(ndev, true); + return 0; + } + + nvme_start_freeze(ctrl); + nvme_wait_freeze(ctrl); + nvme_sync_queues(ctrl); + + if (ctrl->state != NVME_CTRL_LIVE && + ctrl->state != NVME_CTRL_ADMIN_ONLY) + goto unfreeze; + + ndev->last_ps = 0; + ret = nvme_get_power_state(ctrl, &ndev->last_ps); + if (ret < 0) + goto unfreeze; + + ret = nvme_set_power_state(ctrl, ctrl->npss); + if (ret < 0) + goto unfreeze; + + if (ret) { + /* + * Clearing npss forces a controller reset on resume. The + * correct value will be resdicovered then. + */ + nvme_dev_disable(ndev, true); + ctrl->npss = 0; + ret = 0; + goto unfreeze; + } + /* + * A saved state prevents pci pm from generically controlling the + * device's power. If we're using protocol specific settings, we don't + * want pci interfering. + */ + pci_save_state(pdev); +unfreeze: + nvme_unfreeze(ctrl); + return ret; +} + +static int nvme_simple_suspend(struct device *dev) +{ + struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); nvme_dev_disable(ndev, true); return 0; } -static int nvme_resume(struct device *dev) +static int nvme_simple_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); @@ -2866,9 +2946,16 @@ static int nvme_resume(struct device *dev) nvme_reset_ctrl(&ndev->ctrl); return 0; } -#endif -static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume); +const struct dev_pm_ops nvme_dev_pm_ops = { + .suspend = nvme_suspend, + .resume = nvme_resume, + .freeze = nvme_simple_suspend, + .thaw = nvme_simple_resume, + .poweroff = nvme_simple_suspend, + .restore = nvme_simple_resume, +}; +#endif /* CONFIG_PM_SLEEP */ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev, pci_channel_state_t state) @@ -2975,9 +3062,11 @@ static struct pci_driver nvme_driver = { .probe = nvme_probe, .remove = nvme_remove, .shutdown = nvme_shutdown, +#ifdef CONFIG_PM_SLEEP .driver = { .pm = &nvme_dev_pm_ops, }, +#endif .sriov_configure = pci_sriov_configure_simple, .err_handler = &nvme_err_handler, }; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 096211299c07..0cb0fe170977 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -533,6 +533,8 @@ int suspend_devices_and_enter(suspend_state_t state) goto Resume_devices; } +unsigned int resume_delay = 3000; + /** * suspend_finish - Clean up before finishing the suspend sequence. * @@ -541,6 +543,15 @@ int suspend_devices_and_enter(suspend_state_t state) */ static void suspend_finish(void) { + if (resume_delay) { + /* Give kernel threads a head start, such that usb-storage + * can detect devices before syslog attempts to write log + * messages from the suspend code. + */ + thaw_kernel_threads(); + pr_debug("PM: Sleeping for %d milliseconds.\n", resume_delay); + msleep(resume_delay); + } suspend_thaw_processes(); pm_notifier_call_chain(PM_POST_SUSPEND); pm_restore_console(); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1beca96fb625..4b98db9bbc88 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -318,7 +318,16 @@ static int min_extfrag_threshold; static int max_extfrag_threshold = 1000; #endif +extern unsigned int resume_delay; + static struct ctl_table kern_table[] = { + { + .procname = "resume_delay", + .data = &resume_delay, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "sched_child_runs_first", .data = &sysctl_sched_child_runs_first, -- 2.23.0