From 7b642c77811dc3887756f5abac5a9710564b098e Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Fri, 13 Nov 2020 11:11:12 -0800
Subject: [PATCH 01/10] hle: kernel: multicore: Replace n-JITs impl. with 4
 JITs.

---
 src/core/arm/arm_interface.h              |  3 ++
 src/core/arm/dynarmic/arm_dynarmic_32.cpp |  4 +++
 src/core/arm/dynarmic/arm_dynarmic_32.h   |  1 +
 src/core/arm/dynarmic/arm_dynarmic_64.cpp |  4 +++
 src/core/arm/dynarmic/arm_dynarmic_64.h   |  1 +
 src/core/core.cpp                         | 13 +++-----
 src/core/cpu_manager.cpp                  | 16 +++++-----
 src/core/hle/kernel/kernel.cpp            | 23 ++++++++++----
 src/core/hle/kernel/kernel.h              |  3 ++
 src/core/hle/kernel/physical_core.cpp     | 38 ++++++++++++++++++-----
 src/core/hle/kernel/physical_core.h       | 29 ++++++++++++++---
 src/core/hle/kernel/scheduler.cpp         | 20 +++++++-----
 src/core/hle/kernel/svc.cpp               |  9 ++++++
 src/core/hle/kernel/thread.cpp            | 27 ++--------------
 src/core/hle/kernel/thread.h              |  5 ---
 15 files changed, 124 insertions(+), 72 deletions(-)

diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 1f24051e4..b3d8ceaf8 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -64,6 +64,9 @@ public:
     /// Step CPU by one instruction
     virtual void Step() = 0;
 
+    /// Exits execution from a callback, the callback must rewind the stack
+    virtual void ExceptionalExit() = 0;
+
     /// Clear all instruction cache
     virtual void ClearInstructionCache() = 0;
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 6dc03f3b1..af23206f5 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -189,6 +189,10 @@ void ARM_Dynarmic_32::Run() {
     jit->Run();
 }
 
+void ARM_Dynarmic_32::ExceptionalExit() {
+    jit->ExceptionalExit();
+}
+
 void ARM_Dynarmic_32::Step() {
     jit->Step();
 }
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 2bab31b92..e16b689c8 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -42,6 +42,7 @@ public:
     u32 GetPSTATE() const override;
     void SetPSTATE(u32 pstate) override;
     void Run() override;
+    void ExceptionalExit() override;
     void Step() override;
     VAddr GetTlsAddress() const override;
     void SetTlsAddress(VAddr address) override;
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 5c2060d78..1c9fd18b5 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -220,6 +220,10 @@ void ARM_Dynarmic_64::Run() {
     jit->Run();
 }
 
+void ARM_Dynarmic_64::ExceptionalExit() {
+    jit->ExceptionalExit();
+}
+
 void ARM_Dynarmic_64::Step() {
     cb->InterpreterFallback(jit->GetPC(), 1);
 }
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 28e11a17d..aa0a5c424 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -40,6 +40,7 @@ public:
     void SetPSTATE(u32 pstate) override;
     void Run() override;
     void Step() override;
+    void ExceptionalExit() override;
     VAddr GetTlsAddress() const override;
     void SetTlsAddress(VAddr address) override;
     void SetTPIDR_EL0(u64 value) override;
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 76a38ea2a..58368fe3c 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -245,6 +245,7 @@ struct System::Impl {
         }
         AddGlueRegistrationForProcess(*app_loader, *main_process);
         kernel.MakeCurrentProcess(main_process.get());
+        kernel.InitializeCores();
 
         // Initialize cheat engine
         if (cheat_engine) {
@@ -490,11 +491,11 @@ const TelemetrySession& System::TelemetrySession() const {
 }
 
 ARM_Interface& System::CurrentArmInterface() {
-    return impl->kernel.CurrentScheduler().GetCurrentThread()->ArmInterface();
+    return impl->kernel.CurrentPhysicalCore().ArmInterface();
 }
 
 const ARM_Interface& System::CurrentArmInterface() const {
-    return impl->kernel.CurrentScheduler().GetCurrentThread()->ArmInterface();
+    return impl->kernel.CurrentPhysicalCore().ArmInterface();
 }
 
 std::size_t System::CurrentCoreIndex() const {
@@ -554,15 +555,11 @@ const Kernel::Process* System::CurrentProcess() const {
 }
 
 ARM_Interface& System::ArmInterface(std::size_t core_index) {
-    auto* thread = impl->kernel.Scheduler(core_index).GetCurrentThread();
-    ASSERT(thread && !thread->IsHLEThread());
-    return thread->ArmInterface();
+    return impl->kernel.PhysicalCore(core_index).ArmInterface();
 }
 
 const ARM_Interface& System::ArmInterface(std::size_t core_index) const {
-    auto* thread = impl->kernel.Scheduler(core_index).GetCurrentThread();
-    ASSERT(thread && !thread->IsHLEThread());
-    return thread->ArmInterface();
+    return impl->kernel.PhysicalCore(core_index).ArmInterface();
 }
 
 ExclusiveMonitor& System::Monitor() {
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index eeeb6e8df..0cff985e9 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -113,22 +113,23 @@ void CpuManager::MultiCoreRunGuestThread() {
         auto& sched = kernel.CurrentScheduler();
         sched.OnThreadStart();
     }
+    auto* thread = kernel.CurrentScheduler().GetCurrentThread();
+    auto& host_context = thread->GetHostContext();
+    host_context->SetRewindPoint(GuestRewindFunction, this);
     MultiCoreRunGuestLoop();
 }
 
 void CpuManager::MultiCoreRunGuestLoop() {
     auto& kernel = system.Kernel();
-    auto* thread = kernel.CurrentScheduler().GetCurrentThread();
+
     while (true) {
         auto* physical_core = &kernel.CurrentPhysicalCore();
-        auto& arm_interface = thread->ArmInterface();
         system.EnterDynarmicProfile();
         while (!physical_core->IsInterrupted()) {
-            arm_interface.Run();
+            physical_core->Run();
             physical_core = &kernel.CurrentPhysicalCore();
         }
         system.ExitDynarmicProfile();
-        arm_interface.ClearExclusiveState();
         auto& scheduler = kernel.CurrentScheduler();
         scheduler.TryDoContextSwitch();
     }
@@ -209,6 +210,9 @@ void CpuManager::SingleCoreRunGuestThread() {
         auto& sched = kernel.CurrentScheduler();
         sched.OnThreadStart();
     }
+    auto* thread = kernel.CurrentScheduler().GetCurrentThread();
+    auto& host_context = thread->GetHostContext();
+    host_context->SetRewindPoint(GuestRewindFunction, this);
     SingleCoreRunGuestLoop();
 }
 
@@ -217,17 +221,15 @@ void CpuManager::SingleCoreRunGuestLoop() {
     auto* thread = kernel.CurrentScheduler().GetCurrentThread();
     while (true) {
         auto* physical_core = &kernel.CurrentPhysicalCore();
-        auto& arm_interface = thread->ArmInterface();
         system.EnterDynarmicProfile();
         if (!physical_core->IsInterrupted()) {
-            arm_interface.Run();
+            physical_core->Run();
             physical_core = &kernel.CurrentPhysicalCore();
         }
         system.ExitDynarmicProfile();
         thread->SetPhantomMode(true);
         system.CoreTiming().Advance();
         thread->SetPhantomMode(false);
-        arm_interface.ClearExclusiveState();
         PreemptSingleCore();
         auto& scheduler = kernel.Scheduler(current_core);
         scheduler.TryDoContextSwitch();
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index bb3e312a7..4cf9cee42 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -68,6 +68,12 @@ struct KernelCore::Impl {
         InitializeSuspendThreads();
     }
 
+    void InitializeCores() {
+        for (auto& core : cores) {
+            core.Initialize(current_process->Is64BitProcess());
+        }
+    }
+
     void Shutdown() {
         next_object_id = 0;
         next_kernel_process_id = Process::InitialKIPIDMin;
@@ -116,7 +122,7 @@ struct KernelCore::Impl {
             Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES);
         for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
             schedulers[i] = std::make_unique<Kernel::Scheduler>(system, i);
-            cores.emplace_back(system, i, *schedulers[i], interrupts[i]);
+            cores.emplace_back(i, system, *schedulers[i], interrupts);
         }
     }
 
@@ -181,6 +187,7 @@ struct KernelCore::Impl {
         if (process == nullptr) {
             return;
         }
+
         const u32 core_id = GetCurrentHostThreadID();
         if (core_id < Core::Hardware::NUM_CPU_CORES) {
             system.Memory().SetCurrentPageTable(*process, core_id);
@@ -372,6 +379,10 @@ void KernelCore::Initialize() {
     impl->Initialize(*this);
 }
 
+void KernelCore::InitializeCores() {
+    impl->InitializeCores();
+}
+
 void KernelCore::Shutdown() {
     impl->Shutdown();
 }
@@ -486,12 +497,12 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const {
 }
 
 void KernelCore::InvalidateAllInstructionCaches() {
-    auto& threads = GlobalScheduler().GetThreadList();
-    for (auto& thread : threads) {
-        if (!thread->IsHLEThread()) {
-            auto& arm_interface = thread->ArmInterface();
-            arm_interface.ClearInstructionCache();
+    if (!IsMulticore()) {
+        for (auto& physical_core : impl->cores) {
+            physical_core.ArmInterface().ClearInstructionCache();
         }
+    } else {
+        ASSERT_MSG(false, "UNIMPLEMENTED!!!!!!!!!!!");
     }
 }
 
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 16285c3f0..a9fdc5860 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -74,6 +74,9 @@ public:
     /// Resets the kernel to a clean slate for use.
     void Initialize();
 
+    /// Initializes the CPU cores.
+    void InitializeCores();
+
     /// Clears all resources in use by the kernel instance.
     void Shutdown();
 
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index 6e04d025f..50aca5752 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -4,21 +4,43 @@
 
 #include "common/spin_lock.h"
 #include "core/arm/cpu_interrupt_handler.h"
+#include "core/arm/dynarmic/arm_dynarmic_32.h"
+#include "core/arm/dynarmic/arm_dynarmic_64.h"
 #include "core/core.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/physical_core.h"
 #include "core/hle/kernel/scheduler.h"
 
 namespace Kernel {
 
-PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler,
-                           Core::CPUInterruptHandler& interrupt_handler)
-    : interrupt_handler{interrupt_handler},
-      core_index{id}, scheduler{scheduler}, guard{std::make_unique<Common::SpinLock>()} {}
+PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system,
+                           Kernel::Scheduler& scheduler, Core::CPUInterrupts& interrupts)
+    : core_index{core_index}, system{system}, scheduler{scheduler},
+      interrupts{interrupts}, guard{std::make_unique<Common::SpinLock>()} {}
 
 PhysicalCore::~PhysicalCore() = default;
 
+void PhysicalCore::Initialize([[maybe_unused]] bool is_64_bit) {
+#ifdef ARCHITECTURE_x86_64
+    auto& kernel = system.Kernel();
+    if (is_64_bit) {
+        arm_interface = std::make_unique<Core::ARM_Dynarmic_64>(
+            system, interrupts, kernel.IsMulticore(), kernel.GetExclusiveMonitor(), core_index);
+    } else {
+        arm_interface = std::make_unique<Core::ARM_Dynarmic_32>(
+            system, interrupts, kernel.IsMulticore(), kernel.GetExclusiveMonitor(), core_index);
+    }
+#else
+#error Platform not supported yet.
+#endif
+}
+
+void PhysicalCore::Run() {
+    arm_interface->Run();
+}
+
 void PhysicalCore::Idle() {
-    interrupt_handler.AwaitInterrupt();
+    interrupts[core_index].AwaitInterrupt();
 }
 
 void PhysicalCore::Shutdown() {
@@ -26,18 +48,18 @@ void PhysicalCore::Shutdown() {
 }
 
 bool PhysicalCore::IsInterrupted() const {
-    return interrupt_handler.IsInterrupted();
+    return interrupts[core_index].IsInterrupted();
 }
 
 void PhysicalCore::Interrupt() {
     guard->lock();
-    interrupt_handler.SetInterrupt(true);
+    interrupts[core_index].SetInterrupt(true);
     guard->unlock();
 }
 
 void PhysicalCore::ClearInterrupt() {
     guard->lock();
-    interrupt_handler.SetInterrupt(false);
+    interrupts[core_index].SetInterrupt(false);
     guard->unlock();
 }
 
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h
index d7a7a951c..ace058a5a 100644
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -4,9 +4,12 @@
 
 #pragma once
 
+#include <array>
 #include <cstddef>
 #include <memory>
 
+#include "core/arm/arm_interface.h"
+
 namespace Common {
 class SpinLock;
 }
@@ -16,7 +19,6 @@ class Scheduler;
 } // namespace Kernel
 
 namespace Core {
-class ARM_Interface;
 class CPUInterruptHandler;
 class ExclusiveMonitor;
 class System;
@@ -26,8 +28,8 @@ namespace Kernel {
 
 class PhysicalCore {
 public:
-    PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler,
-                 Core::CPUInterruptHandler& interrupt_handler);
+    PhysicalCore(std::size_t core_index, Core::System& system, Kernel::Scheduler& scheduler,
+                 Core::CPUInterrupts& interrupts);
     ~PhysicalCore();
 
     PhysicalCore(const PhysicalCore&) = delete;
@@ -36,7 +38,14 @@ public:
     PhysicalCore(PhysicalCore&&) = default;
     PhysicalCore& operator=(PhysicalCore&&) = default;
 
+    /// Initialize the core for the specified parameters.
+    void Initialize(bool is_64_bit);
+
+    /// Execute current jit state
+    void Run();
+
     void Idle();
+
     /// Interrupt this physical core.
     void Interrupt();
 
@@ -49,6 +58,14 @@ public:
     // Shutdown this physical core.
     void Shutdown();
 
+    Core::ARM_Interface& ArmInterface() {
+        return *arm_interface;
+    }
+
+    const Core::ARM_Interface& ArmInterface() const {
+        return *arm_interface;
+    }
+
     bool IsMainCore() const {
         return core_index == 0;
     }
@@ -70,10 +87,12 @@ public:
     }
 
 private:
-    Core::CPUInterruptHandler& interrupt_handler;
-    std::size_t core_index;
+    const std::size_t core_index;
+    Core::System& system;
     Kernel::Scheduler& scheduler;
+    Core::CPUInterrupts& interrupts;
     std::unique_ptr<Common::SpinLock> guard;
+    std::unique_ptr<Core::ARM_Interface> arm_interface;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 6b7db5372..0805e9914 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -621,11 +621,14 @@ void Scheduler::OnThreadStart() {
 void Scheduler::Unload() {
     Thread* thread = current_thread.get();
     if (thread) {
-        thread->SetContinuousOnSVC(false);
         thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
         thread->SetIsRunning(false);
+        if (thread->IsContinuousOnSVC() && !thread->IsHLEThread()) {
+            system.ArmInterface(core_id).ExceptionalExit();
+            thread->SetContinuousOnSVC(false);
+        }
         if (!thread->IsHLEThread() && !thread->HasExited()) {
-            Core::ARM_Interface& cpu_core = thread->ArmInterface();
+            Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
             cpu_core.SaveContext(thread->GetContext32());
             cpu_core.SaveContext(thread->GetContext64());
             // Save the TPIDR_EL0 system register in case it was modified.
@@ -652,12 +655,11 @@ void Scheduler::Reload() {
             system.Kernel().MakeCurrentProcess(thread_owner_process);
         }
         if (!thread->IsHLEThread()) {
-            Core::ARM_Interface& cpu_core = thread->ArmInterface();
+            Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
             cpu_core.LoadContext(thread->GetContext32());
             cpu_core.LoadContext(thread->GetContext64());
             cpu_core.SetTlsAddress(thread->GetTLSAddress());
             cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
-            cpu_core.ChangeProcessorID(this->core_id);
             cpu_core.ClearExclusiveState();
         }
     }
@@ -679,12 +681,11 @@ void Scheduler::SwitchContextStep2() {
             system.Kernel().MakeCurrentProcess(thread_owner_process);
         }
         if (!selected_thread->IsHLEThread()) {
-            Core::ARM_Interface& cpu_core = selected_thread->ArmInterface();
+            Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
             cpu_core.LoadContext(selected_thread->GetContext32());
             cpu_core.LoadContext(selected_thread->GetContext64());
             cpu_core.SetTlsAddress(selected_thread->GetTLSAddress());
             cpu_core.SetTPIDR_EL0(selected_thread->GetTPIDR_EL0());
-            cpu_core.ChangeProcessorID(this->core_id);
             cpu_core.ClearExclusiveState();
         }
     }
@@ -715,11 +716,14 @@ void Scheduler::SwitchContext() {
         if (new_thread != nullptr && new_thread->IsSuspendThread()) {
             previous_thread->SetWasRunning(true);
         }
-        previous_thread->SetContinuousOnSVC(false);
         previous_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
         previous_thread->SetIsRunning(false);
+        if (previous_thread->IsContinuousOnSVC() && !previous_thread->IsHLEThread()) {
+            system.ArmInterface(core_id).ExceptionalExit();
+            previous_thread->SetContinuousOnSVC(false);
+        }
         if (!previous_thread->IsHLEThread() && !previous_thread->HasExited()) {
-            Core::ARM_Interface& cpu_core = previous_thread->ArmInterface();
+            Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
             cpu_core.SaveContext(previous_thread->GetContext32());
             cpu_core.SaveContext(previous_thread->GetContext64());
             // Save the TPIDR_EL0 system register in case it was modified.
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index e3b770d66..95d6e2b4d 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -2639,6 +2639,9 @@ void Call(Core::System& system, u32 immediate) {
     auto& kernel = system.Kernel();
     kernel.EnterSVCProfile();
 
+    auto* thread = system.CurrentScheduler().GetCurrentThread();
+    thread->SetContinuousOnSVC(true);
+
     const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate)
                                                                         : GetSVCInfo32(immediate);
     if (info) {
@@ -2652,6 +2655,12 @@ void Call(Core::System& system, u32 immediate) {
     }
 
     kernel.ExitSVCProfile();
+
+    if (!thread->IsContinuousOnSVC()) {
+        auto* host_context = thread->GetHostContext().get();
+        host_context->Rewind();
+    }
+
     system.EnterDynarmicProfile();
 }
 
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index da0cb26b6..3abe12810 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -12,7 +12,6 @@
 #include "common/fiber.h"
 #include "common/logging/log.h"
 #include "common/thread_queue_list.h"
-#include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/cpu_manager.h"
 #include "core/hardware_properties.h"
@@ -62,7 +61,6 @@ void Thread::Stop() {
             // Mark the TLS slot in the thread's page as free.
             owner_process->FreeTLSRegion(tls_address);
         }
-        arm_interface.reset();
         has_exited = true;
     }
     global_handle = 0;
@@ -217,22 +215,9 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
         thread->tls_address = 0;
     }
 
-    thread->arm_interface.reset();
+    // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
+    // to initialize the context
     if ((type_flags & THREADTYPE_HLE) == 0) {
-#ifdef ARCHITECTURE_x86_64
-        if (owner_process && !owner_process->Is64BitProcess()) {
-            thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_32>(
-                system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(),
-                processor_id);
-        } else {
-            thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_64>(
-                system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(),
-                processor_id);
-        }
-#else
-#error Platform not supported yet.
-#endif
-
         ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top),
                              static_cast<u32>(entry_point), static_cast<u32>(arg));
         ResetThreadContext64(thread->context_64, stack_top, entry_point, arg);
@@ -268,14 +253,6 @@ VAddr Thread::GetCommandBufferAddress() const {
     return GetTLSAddress() + command_header_offset;
 }
 
-Core::ARM_Interface& Thread::ArmInterface() {
-    return *arm_interface;
-}
-
-const Core::ARM_Interface& Thread::ArmInterface() const {
-    return *arm_interface;
-}
-
 void Thread::SetStatus(ThreadStatus new_status) {
     if (new_status == status) {
         return;
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 8daf79fac..20e86fb81 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -248,10 +248,6 @@ public:
 
     void SetSynchronizationResults(SynchronizationObject* object, ResultCode result);
 
-    Core::ARM_Interface& ArmInterface();
-
-    const Core::ARM_Interface& ArmInterface() const;
-
     SynchronizationObject* GetSignalingObject() const {
         return signaling_object;
     }
@@ -586,7 +582,6 @@ private:
     Common::SpinLock context_guard{};
     ThreadContext32 context_32{};
     ThreadContext64 context_64{};
-    std::unique_ptr<Core::ARM_Interface> arm_interface{};
     std::shared_ptr<Common::Fiber> host_context{};
 
     u64 thread_id = 0;

From c042a89113617f75e81163f103ef82d6d714cd87 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Fri, 13 Nov 2020 15:17:47 -0800
Subject: [PATCH 02/10] common: fiber: Use boost::context instead of native
 fibers on Windows.

---
 src/common/fiber.cpp           | 114 +++------------------------------
 src/common/fiber.h             |   9 ---
 src/core/hle/kernel/kernel.cpp |   2 +-
 3 files changed, 9 insertions(+), 116 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index 3e3029cd1..3978c8624 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -6,17 +6,16 @@
 #include "common/fiber.h"
 #include "common/spin_lock.h"
 
-#if defined(_WIN32) || defined(WIN32)
-#include <windows.h>
-#else
 #include <boost/context/detail/fcontext.hpp>
-#endif
 
 namespace Common {
 
-constexpr std::size_t default_stack_size = 256 * 1024; // 256kb
+constexpr std::size_t default_stack_size = 256 * 1024;
 
 struct Fiber::FiberImpl {
+    alignas(64) std::array<u8, default_stack_size> stack;
+    alignas(64) std::array<u8, default_stack_size> rewind_stack;
+
     SpinLock guard{};
     std::function<void(void*)> entry_point;
     std::function<void(void*)> rewind_point;
@@ -26,17 +25,10 @@ struct Fiber::FiberImpl {
     bool is_thread_fiber{};
     bool released{};
 
-#if defined(_WIN32) || defined(WIN32)
-    LPVOID handle = nullptr;
-    LPVOID rewind_handle = nullptr;
-#else
-    alignas(64) std::array<u8, default_stack_size> stack;
-    alignas(64) std::array<u8, default_stack_size> rewind_stack;
-    u8* stack_limit;
-    u8* rewind_stack_limit;
-    boost::context::detail::fcontext_t context;
-    boost::context::detail::fcontext_t rewind_context;
-#endif
+    u8* stack_limit{};
+    u8* rewind_stack_limit{};
+    boost::context::detail::fcontext_t context{};
+    boost::context::detail::fcontext_t rewind_context{};
 };
 
 void Fiber::SetStartParameter(void* new_parameter) {
@@ -48,95 +40,6 @@ void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewin
     impl->rewind_parameter = rewind_param;
 }
 
-#if defined(_WIN32) || defined(WIN32)
-
-void Fiber::Start() {
-    ASSERT(impl->previous_fiber != nullptr);
-    impl->previous_fiber->impl->guard.unlock();
-    impl->previous_fiber.reset();
-    impl->entry_point(impl->start_parameter);
-    UNREACHABLE();
-}
-
-void Fiber::OnRewind() {
-    ASSERT(impl->handle != nullptr);
-    DeleteFiber(impl->handle);
-    impl->handle = impl->rewind_handle;
-    impl->rewind_handle = nullptr;
-    impl->rewind_point(impl->rewind_parameter);
-    UNREACHABLE();
-}
-
-void Fiber::FiberStartFunc(void* fiber_parameter) {
-    auto* fiber = static_cast<Fiber*>(fiber_parameter);
-    fiber->Start();
-}
-
-void Fiber::RewindStartFunc(void* fiber_parameter) {
-    auto* fiber = static_cast<Fiber*>(fiber_parameter);
-    fiber->OnRewind();
-}
-
-Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
-    : impl{std::make_unique<FiberImpl>()} {
-    impl->entry_point = std::move(entry_point_func);
-    impl->start_parameter = start_parameter;
-    impl->handle = CreateFiber(default_stack_size, &FiberStartFunc, this);
-}
-
-Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {}
-
-Fiber::~Fiber() {
-    if (impl->released) {
-        return;
-    }
-    // Make sure the Fiber is not being used
-    const bool locked = impl->guard.try_lock();
-    ASSERT_MSG(locked, "Destroying a fiber that's still running");
-    if (locked) {
-        impl->guard.unlock();
-    }
-    DeleteFiber(impl->handle);
-}
-
-void Fiber::Exit() {
-    ASSERT_MSG(impl->is_thread_fiber, "Exitting non main thread fiber");
-    if (!impl->is_thread_fiber) {
-        return;
-    }
-    ConvertFiberToThread();
-    impl->guard.unlock();
-    impl->released = true;
-}
-
-void Fiber::Rewind() {
-    ASSERT(impl->rewind_point);
-    ASSERT(impl->rewind_handle == nullptr);
-    impl->rewind_handle = CreateFiber(default_stack_size, &RewindStartFunc, this);
-    SwitchToFiber(impl->rewind_handle);
-}
-
-void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
-    ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
-    ASSERT_MSG(to != nullptr, "Next fiber is null!");
-    to->impl->guard.lock();
-    to->impl->previous_fiber = from;
-    SwitchToFiber(to->impl->handle);
-    ASSERT(from->impl->previous_fiber != nullptr);
-    from->impl->previous_fiber->impl->guard.unlock();
-    from->impl->previous_fiber.reset();
-}
-
-std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
-    std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
-    fiber->impl->guard.lock();
-    fiber->impl->handle = ConvertThreadToFiber(nullptr);
-    fiber->impl->is_thread_fiber = true;
-    return fiber;
-}
-
-#else
-
 void Fiber::Start(boost::context::detail::transfer_t& transfer) {
     ASSERT(impl->previous_fiber != nullptr);
     impl->previous_fiber->impl->context = transfer.fctx;
@@ -229,5 +132,4 @@ std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
     return fiber;
 }
 
-#endif
 } // namespace Common
diff --git a/src/common/fiber.h b/src/common/fiber.h
index 5323e8579..f7f587f8c 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -7,11 +7,9 @@
 #include <functional>
 #include <memory>
 
-#if !defined(_WIN32) && !defined(WIN32)
 namespace boost::context::detail {
 struct transfer_t;
 }
-#endif
 
 namespace Common {
 
@@ -59,17 +57,10 @@ public:
 private:
     Fiber();
 
-#if defined(_WIN32) || defined(WIN32)
-    void OnRewind();
-    void Start();
-    static void FiberStartFunc(void* fiber_parameter);
-    static void RewindStartFunc(void* fiber_parameter);
-#else
     void OnRewind(boost::context::detail::transfer_t& transfer);
     void Start(boost::context::detail::transfer_t& transfer);
     static void FiberStartFunc(boost::context::detail::transfer_t transfer);
     static void RewindStartFunc(boost::context::detail::transfer_t transfer);
-#endif
 
     struct FiberImpl;
     std::unique_ptr<FiberImpl> impl;
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 4cf9cee42..c426b6378 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -502,7 +502,7 @@ void KernelCore::InvalidateAllInstructionCaches() {
             physical_core.ArmInterface().ClearInstructionCache();
         }
     } else {
-        ASSERT_MSG(false, "UNIMPLEMENTED!!!!!!!!!!!");
+        UNIMPLEMENTED();
     }
 }
 

From 9423347c1b7adb58d4881995a15c80d41faa5c74 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Fri, 13 Nov 2020 21:23:04 -0800
Subject: [PATCH 03/10] hle: kernel: SynchronizationObject: Use atomic_bool for
 is_signaled.

---
 src/core/hle/kernel/synchronization_object.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/core/hle/kernel/synchronization_object.h b/src/core/hle/kernel/synchronization_object.h
index f89b24204..7408ed51f 100644
--- a/src/core/hle/kernel/synchronization_object.h
+++ b/src/core/hle/kernel/synchronization_object.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <atomic>
 #include <memory>
 #include <vector>
 
@@ -56,7 +57,7 @@ public:
     void ClearWaitingThreads();
 
 protected:
-    bool is_signaled{}; // Tells if this sync object is signalled;
+    std::atomic_bool is_signaled{}; // Tells if this sync object is signaled
 
 private:
     /// Threads waiting for this object to become available

From 9705f651b25ad622dfefd5c19ca147b93068cf47 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Fri, 13 Nov 2020 21:28:12 -0800
Subject: [PATCH 04/10] hle: kernel: AddressArbiter: Remove unused code.

---
 src/core/hle/kernel/address_arbiter.cpp | 6 ------
 src/core/hle/kernel/address_arbiter.h   | 3 ---
 2 files changed, 9 deletions(-)

diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index b882eaa0f..048acd30e 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -275,12 +275,6 @@ ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 t
     return current_thread->GetSignalingResult();
 }
 
-void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
-    ASSERT(thread->GetStatus() == ThreadStatus::WaitArb);
-    RemoveThread(thread);
-    thread->SetArbiterWaitAddress(0);
-}
-
 void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
     const VAddr arb_addr = thread->GetArbiterWaitAddress();
     std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index 0b05d533c..b91edc67d 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -50,9 +50,6 @@ public:
     /// Waits on an address with a particular arbitration type.
     ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);
 
-    /// Removes a thread from the container and resets its address arbiter adress to 0
-    void HandleWakeupThread(std::shared_ptr<Thread> thread);
-
 private:
     /// Signals an address being waited on.
     ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);

From c0870315fd89dfeabdbe5833d52e753a5d8417ea Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Fri, 13 Nov 2020 22:49:33 -0800
Subject: [PATCH 05/10] hle: kernel: time_manager: Avoid a crash on process
 exit.

---
 src/core/hle/kernel/time_manager.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp
index 95f2446c9..ea9089ff8 100644
--- a/src/core/hle/kernel/time_manager.cpp
+++ b/src/core/hle/kernel/time_manager.cpp
@@ -24,7 +24,10 @@ TimeManager::TimeManager(Core::System& system_) : system{system_} {
                 return;
             }
             auto thread = this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle);
-            thread->OnWakeUp();
+            if (thread) {
+                // Thread can be null if process has exited
+                thread->OnWakeUp();
+            }
         });
 }
 

From 63fd1bb50302867b233325f253b1e2abbc379875 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Fri, 13 Nov 2020 23:20:32 -0800
Subject: [PATCH 06/10] core: arm: Implement InvalidateCacheRange for CPU cache
 invalidation.

---
 src/core/arm/arm_interface.h              | 19 +++++++++++++------
 src/core/arm/dynarmic/arm_dynarmic_32.cpp |  7 +++++++
 src/core/arm/dynarmic/arm_dynarmic_32.h   |  1 +
 src/core/arm/dynarmic/arm_dynarmic_64.cpp |  7 +++++++
 src/core/arm/dynarmic/arm_dynarmic_64.h   |  1 +
 src/core/core.cpp                         |  4 ++++
 src/core/core.h                           |  2 ++
 src/core/hle/kernel/kernel.cpp            | 15 ++++++++++-----
 src/core/hle/kernel/kernel.h              |  2 ++
 src/core/hle/kernel/memory/page_table.cpp |  5 +++++
 src/core/hle/kernel/physical_core.h       |  4 ++++
 src/core/hle/service/ldr/ldr.cpp          |  5 -----
 12 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index b3d8ceaf8..70098c526 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -70,12 +70,19 @@ public:
     /// Clear all instruction cache
     virtual void ClearInstructionCache() = 0;
 
-    /// Notifies CPU emulation that the current page table has changed.
-    ///
-    /// @param new_page_table                 The new page table.
-    /// @param new_address_space_size_in_bits The new usable size of the address space in bits.
-    ///                                       This can be either 32, 36, or 39 on official software.
-    ///
+    /**
+     * Clear instruction cache range
+     * @param addr Start address of the cache range to clear
+     * @param size Size of the cache range to clear, starting at addr
+     */
+    virtual void InvalidateCacheRange(VAddr addr, std::size_t size) = 0;
+
+    /**
+     * Notifies CPU emulation that the current page table has changed.
+     *  @param new_page_table                 The new page table.
+     *  @param new_address_space_size_in_bits The new usable size of the address space in bits.
+     *                                        This can be either 32, 36, or 39 on official software.
+     */
     virtual void PageTableChanged(Common::PageTable& new_page_table,
                                   std::size_t new_address_space_size_in_bits) = 0;
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index af23206f5..193fd7d62 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -286,6 +286,13 @@ void ARM_Dynarmic_32::ClearInstructionCache() {
     jit->ClearCache();
 }
 
+void ARM_Dynarmic_32::InvalidateCacheRange(VAddr addr, std::size_t size) {
+    if (!jit) {
+        return;
+    }
+    jit->InvalidateCacheRange(static_cast<u32>(addr), size);
+}
+
 void ARM_Dynarmic_32::ClearExclusiveState() {
     jit->ClearExclusiveState();
 }
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index e16b689c8..35e9ced48 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -59,6 +59,7 @@ public:
     void ClearExclusiveState() override;
 
     void ClearInstructionCache() override;
+    void InvalidateCacheRange(VAddr addr, std::size_t size) override;
     void PageTableChanged(Common::PageTable& new_page_table,
                           std::size_t new_address_space_size_in_bits) override;
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 1c9fd18b5..0f0585d0f 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -322,6 +322,13 @@ void ARM_Dynarmic_64::ClearInstructionCache() {
     jit->ClearCache();
 }
 
+void ARM_Dynarmic_64::InvalidateCacheRange(VAddr addr, std::size_t size) {
+    if (!jit) {
+        return;
+    }
+    jit->InvalidateCacheRange(addr, size);
+}
+
 void ARM_Dynarmic_64::ClearExclusiveState() {
     jit->ClearExclusiveState();
 }
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index aa0a5c424..329b59a32 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -56,6 +56,7 @@ public:
     void ClearExclusiveState() override;
 
     void ClearInstructionCache() override;
+    void InvalidateCacheRange(VAddr addr, std::size_t size) override;
     void PageTableChanged(Common::PageTable& new_page_table,
                           std::size_t new_address_space_size_in_bits) override;
 
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 58368fe3c..01e4faac8 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -457,6 +457,10 @@ void System::InvalidateCpuInstructionCaches() {
     impl->kernel.InvalidateAllInstructionCaches();
 }
 
+void System::InvalidateCpuInstructionCacheRange(VAddr addr, std::size_t size) {
+    impl->kernel.InvalidateCpuInstructionCacheRange(addr, size);
+}
+
 void System::Shutdown() {
     impl->Shutdown();
 }
diff --git a/src/core/core.h b/src/core/core.h
index f642befc0..29b8fb92a 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -166,6 +166,8 @@ public:
      */
     void InvalidateCpuInstructionCaches();
 
+    void InvalidateCpuInstructionCacheRange(VAddr addr, std::size_t size);
+
     /// Shutdown the emulated system.
     void Shutdown();
 
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index c426b6378..929db696d 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -497,12 +497,17 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const {
 }
 
 void KernelCore::InvalidateAllInstructionCaches() {
-    if (!IsMulticore()) {
-        for (auto& physical_core : impl->cores) {
-            physical_core.ArmInterface().ClearInstructionCache();
+    for (auto& physical_core : impl->cores) {
+        physical_core.ArmInterface().ClearInstructionCache();
+    }
+}
+
+void KernelCore::InvalidateCpuInstructionCacheRange(VAddr addr, std::size_t size) {
+    for (auto& physical_core : impl->cores) {
+        if (!physical_core.IsInitialized()) {
+            continue;
         }
-    } else {
-        UNIMPLEMENTED();
+        physical_core.ArmInterface().InvalidateCacheRange(addr, size);
     }
 }
 
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index a9fdc5860..a73a93039 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -156,6 +156,8 @@ public:
 
     void InvalidateAllInstructionCaches();
 
+    void InvalidateCpuInstructionCacheRange(VAddr addr, std::size_t size);
+
     /// Adds a port to the named port table
     void AddNamedPort(std::string name, std::shared_ptr<ClientPort> port);
 
diff --git a/src/core/hle/kernel/memory/page_table.cpp b/src/core/hle/kernel/memory/page_table.cpp
index a3fadb533..f53a7be82 100644
--- a/src/core/hle/kernel/memory/page_table.cpp
+++ b/src/core/hle/kernel/memory/page_table.cpp
@@ -670,6 +670,11 @@ ResultCode PageTable::SetCodeMemoryPermission(VAddr addr, std::size_t size, Memo
         return RESULT_SUCCESS;
     }
 
+    if ((prev_perm & MemoryPermission::Execute) != (perm & MemoryPermission::Execute)) {
+        // Memory execution state is changing, invalidate CPU cache range
+        system.InvalidateCpuInstructionCacheRange(addr, size);
+    }
+
     const std::size_t num_pages{size / PageSize};
     const OperationType operation{(perm & MemoryPermission::Execute) != MemoryPermission::None
                                       ? OperationType::ChangePermissionsAndRefresh
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h
index ace058a5a..37513130a 100644
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -58,6 +58,10 @@ public:
     // Shutdown this physical core.
     void Shutdown();
 
+    bool IsInitialized() const {
+        return arm_interface != nullptr;
+    }
+
     Core::ARM_Interface& ArmInterface() {
         return *arm_interface;
     }
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index fff68326b..9da786b4e 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -527,9 +527,6 @@ public:
                                      header.segment_headers[RO_INDEX].memory_size,
                                      header.segment_headers[DATA_INDEX].memory_size, nro_address});
 
-        // Invalidate JIT caches for the newly mapped process code
-        system.InvalidateCpuInstructionCaches();
-
         IPC::ResponseBuilder rb{ctx, 4};
         rb.Push(RESULT_SUCCESS);
         rb.Push(*map_result);
@@ -590,8 +587,6 @@ public:
 
         const auto result{UnmapNro(iter->second)};
 
-        system.InvalidateCpuInstructionCaches();
-
         nro.erase(iter);
 
         IPC::ResponseBuilder rb{ctx, 2};

From c2ad1243baaf25dcb6f9c80121c48ff6da1986cb Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 14 Nov 2020 22:37:45 -0800
Subject: [PATCH 07/10] hle: kernel: thread: Remove unused "Running" state.

---
 src/core/hle/kernel/thread.cpp  |  5 -----
 src/core/hle/kernel/thread.h    |  1 -
 src/yuzu/debugger/wait_tree.cpp | 24 +++++++++---------------
 3 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 3abe12810..7d1eb2c6e 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -88,10 +88,6 @@ void Thread::ResumeFromWait() {
         // before actually resuming. We can ignore subsequent wakeups if the thread status has
         // already been set to ThreadStatus::Ready.
         return;
-
-    case ThreadStatus::Running:
-        DEBUG_ASSERT_MSG(false, "Thread with object id {} has already resumed.", GetObjectId());
-        return;
     case ThreadStatus::Dead:
         // This should never happen, as threads must complete before being stopped.
         DEBUG_ASSERT_MSG(false, "Thread with object id {} cannot be resumed because it's DEAD.",
@@ -260,7 +256,6 @@ void Thread::SetStatus(ThreadStatus new_status) {
 
     switch (new_status) {
     case ThreadStatus::Ready:
-    case ThreadStatus::Running:
         SetSchedulingStatus(ThreadSchedStatus::Runnable);
         break;
     case ThreadStatus::Dormant:
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 20e86fb81..a75071e9b 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -72,7 +72,6 @@ enum ThreadProcessorId : s32 {
 };
 
 enum class ThreadStatus {
-    Running,      ///< Currently running
     Ready,        ///< Ready to run
     Paused,       ///< Paused by SetThreadActivity or debug
     WaitHLEEvent, ///< Waiting for hle event to finish
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 3439cb333..a20824719 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -24,7 +24,6 @@
 namespace {
 
 constexpr std::array<std::array<Qt::GlobalColor, 2>, 10> WaitTreeColors{{
-    {Qt::GlobalColor::darkGreen, Qt::GlobalColor::green},
     {Qt::GlobalColor::darkGreen, Qt::GlobalColor::green},
     {Qt::GlobalColor::darkBlue, Qt::GlobalColor::cyan},
     {Qt::GlobalColor::lightGray, Qt::GlobalColor::lightGray},
@@ -239,9 +238,6 @@ QString WaitTreeThread::GetText() const {
     const auto& thread = static_cast<const Kernel::Thread&>(object);
     QString status;
     switch (thread.GetStatus()) {
-    case Kernel::ThreadStatus::Running:
-        status = tr("running");
-        break;
     case Kernel::ThreadStatus::Ready:
         if (!thread.IsPaused()) {
             if (thread.WasRunning()) {
@@ -298,34 +294,32 @@ QColor WaitTreeThread::GetColor() const {
 
     const auto& thread = static_cast<const Kernel::Thread&>(object);
     switch (thread.GetStatus()) {
-    case Kernel::ThreadStatus::Running:
-        return QColor(WaitTreeColors[0][color_index]);
     case Kernel::ThreadStatus::Ready:
         if (!thread.IsPaused()) {
             if (thread.WasRunning()) {
-                return QColor(WaitTreeColors[1][color_index]);
+                return QColor(WaitTreeColors[0][color_index]);
             } else {
-                return QColor(WaitTreeColors[2][color_index]);
+                return QColor(WaitTreeColors[1][color_index]);
             }
         } else {
-            return QColor(WaitTreeColors[3][color_index]);
+            return QColor(WaitTreeColors[2][color_index]);
         }
     case Kernel::ThreadStatus::Paused:
-        return QColor(WaitTreeColors[4][color_index]);
+        return QColor(WaitTreeColors[3][color_index]);
     case Kernel::ThreadStatus::WaitHLEEvent:
     case Kernel::ThreadStatus::WaitIPC:
-        return QColor(WaitTreeColors[5][color_index]);
+        return QColor(WaitTreeColors[4][color_index]);
     case Kernel::ThreadStatus::WaitSleep:
-        return QColor(WaitTreeColors[6][color_index]);
+        return QColor(WaitTreeColors[5][color_index]);
     case Kernel::ThreadStatus::WaitSynch:
     case Kernel::ThreadStatus::WaitMutex:
     case Kernel::ThreadStatus::WaitCondVar:
     case Kernel::ThreadStatus::WaitArb:
-        return QColor(WaitTreeColors[7][color_index]);
+        return QColor(WaitTreeColors[6][color_index]);
     case Kernel::ThreadStatus::Dormant:
-        return QColor(WaitTreeColors[8][color_index]);
+        return QColor(WaitTreeColors[7][color_index]);
     case Kernel::ThreadStatus::Dead:
-        return QColor(WaitTreeColors[9][color_index]);
+        return QColor(WaitTreeColors[8][color_index]);
     default:
         return WaitTreeItem::GetColor();
     }

From 24cae76d16d7344154c1a507889e33793b369be7 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sun, 15 Nov 2020 00:36:26 -0800
Subject: [PATCH 08/10] common: fiber: Use VirtualBuffer for stack memory.

- This will be aligned by default, and helps memory usage.
---
 src/common/fiber.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index 3978c8624..3c1eefcb7 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -5,6 +5,7 @@
 #include "common/assert.h"
 #include "common/fiber.h"
 #include "common/spin_lock.h"
+#include "common/virtual_buffer.h"
 
 #include <boost/context/detail/fcontext.hpp>
 
@@ -13,8 +14,10 @@ namespace Common {
 constexpr std::size_t default_stack_size = 256 * 1024;
 
 struct Fiber::FiberImpl {
-    alignas(64) std::array<u8, default_stack_size> stack;
-    alignas(64) std::array<u8, default_stack_size> rewind_stack;
+    FiberImpl() : stack{default_stack_size}, rewind_stack{default_stack_size} {}
+
+    VirtualBuffer<u8> stack;
+    VirtualBuffer<u8> rewind_stack;
 
     SpinLock guard{};
     std::function<void(void*)> entry_point;

From b7ef581c6e19d0159206aa42e1e03de461a77d7e Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 18 Nov 2020 16:19:00 -0800
Subject: [PATCH 09/10] kernel: time_manager: Protect access with a mutex.

---
 src/core/hle/kernel/time_manager.cpp | 4 +++-
 src/core/hle/kernel/time_manager.h   | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp
index ea9089ff8..caf329bfb 100644
--- a/src/core/hle/kernel/time_manager.cpp
+++ b/src/core/hle/kernel/time_manager.cpp
@@ -32,6 +32,7 @@ TimeManager::TimeManager(Core::System& system_) : system{system_} {
 }
 
 void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 nanoseconds) {
+    std::lock_guard lock{mutex};
     event_handle = timetask->GetGlobalHandle();
     if (nanoseconds > 0) {
         ASSERT(timetask);
@@ -46,6 +47,7 @@ void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64
 }
 
 void TimeManager::UnscheduleTimeEvent(Handle event_handle) {
+    std::lock_guard lock{mutex};
     if (event_handle == InvalidHandle) {
         return;
     }
@@ -54,7 +56,7 @@ void TimeManager::UnscheduleTimeEvent(Handle event_handle) {
 }
 
 void TimeManager::CancelTimeEvent(Thread* time_task) {
-    Handle event_handle = time_task->GetGlobalHandle();
+    const Handle event_handle = time_task->GetGlobalHandle();
     UnscheduleTimeEvent(event_handle);
 }
 
diff --git a/src/core/hle/kernel/time_manager.h b/src/core/hle/kernel/time_manager.h
index 307a18765..f39df39a0 100644
--- a/src/core/hle/kernel/time_manager.h
+++ b/src/core/hle/kernel/time_manager.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <memory>
+#include <mutex>
 #include <unordered_map>
 
 #include "core/hle/kernel/object.h"
@@ -42,6 +43,7 @@ private:
     Core::System& system;
     std::shared_ptr<Core::Timing::EventType> time_manager_event_type;
     std::unordered_map<Handle, bool> cancelled_events;
+    std::mutex mutex;
 };
 
 } // namespace Kernel

From 4b9e1b6586a8a4017b8e3e0fb52457d1e2568066 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 18 Nov 2020 16:52:47 -0800
Subject: [PATCH 10/10] kernel: scheduler: Minor cleanup to remove duplicated
 code.

---
 src/core/hle/kernel/scheduler.cpp | 58 +++++++------------------------
 src/core/hle/kernel/scheduler.h   |  2 ++
 2 files changed, 14 insertions(+), 46 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 0805e9914..5c63b0b4a 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -618,8 +618,7 @@ void Scheduler::OnThreadStart() {
     SwitchContextStep2();
 }
 
-void Scheduler::Unload() {
-    Thread* thread = current_thread.get();
+void Scheduler::Unload(Thread* thread) {
     if (thread) {
         thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
         thread->SetIsRunning(false);
@@ -639,8 +638,11 @@ void Scheduler::Unload() {
     }
 }
 
-void Scheduler::Reload() {
-    Thread* thread = current_thread.get();
+void Scheduler::Unload() {
+    Unload(current_thread.get());
+}
+
+void Scheduler::Reload(Thread* thread) {
     if (thread) {
         ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
                    "Thread must be runnable.");
@@ -665,30 +667,13 @@ void Scheduler::Reload() {
     }
 }
 
+void Scheduler::Reload() {
+    Reload(current_thread.get());
+}
+
 void Scheduler::SwitchContextStep2() {
     // Load context of new thread
-    if (selected_thread) {
-        ASSERT_MSG(selected_thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
-                   "Thread must be runnable.");
-
-        // Cancel any outstanding wakeup events for this thread
-        selected_thread->SetIsRunning(true);
-        selected_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
-        selected_thread->SetWasRunning(false);
-
-        auto* const thread_owner_process = current_thread->GetOwnerProcess();
-        if (thread_owner_process != nullptr) {
-            system.Kernel().MakeCurrentProcess(thread_owner_process);
-        }
-        if (!selected_thread->IsHLEThread()) {
-            Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
-            cpu_core.LoadContext(selected_thread->GetContext32());
-            cpu_core.LoadContext(selected_thread->GetContext64());
-            cpu_core.SetTlsAddress(selected_thread->GetTLSAddress());
-            cpu_core.SetTPIDR_EL0(selected_thread->GetTPIDR_EL0());
-            cpu_core.ClearExclusiveState();
-        }
-    }
+    Reload(selected_thread.get());
 
     TryDoContextSwitch();
 }
@@ -712,26 +697,7 @@ void Scheduler::SwitchContext() {
     UpdateLastContextSwitchTime(previous_thread, previous_process);
 
     // Save context for previous thread
-    if (previous_thread) {
-        if (new_thread != nullptr && new_thread->IsSuspendThread()) {
-            previous_thread->SetWasRunning(true);
-        }
-        previous_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
-        previous_thread->SetIsRunning(false);
-        if (previous_thread->IsContinuousOnSVC() && !previous_thread->IsHLEThread()) {
-            system.ArmInterface(core_id).ExceptionalExit();
-            previous_thread->SetContinuousOnSVC(false);
-        }
-        if (!previous_thread->IsHLEThread() && !previous_thread->HasExited()) {
-            Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
-            cpu_core.SaveContext(previous_thread->GetContext32());
-            cpu_core.SaveContext(previous_thread->GetContext64());
-            // Save the TPIDR_EL0 system register in case it was modified.
-            previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
-            cpu_core.ClearExclusiveState();
-        }
-        previous_thread->context_guard.unlock();
-    }
+    Unload(previous_thread);
 
     std::shared_ptr<Common::Fiber>* old_context;
     if (previous_thread != nullptr) {
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index b6f04dcea..68db4a5ef 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -212,8 +212,10 @@ public:
 
     /// The next two are for SingleCore Only.
     /// Unload current thread before preempting core.
+    void Unload(Thread* thread);
     void Unload();
     /// Reload current thread after core preemption.
+    void Reload(Thread* thread);
     void Reload();
 
     /// Gets the current running thread