diff --git a/docs/RegisterAllocator.md b/docs/RegisterAllocator.md
new file mode 100644
index 00000000..b182d803
--- /dev/null
+++ b/docs/RegisterAllocator.md
@@ -0,0 +1,85 @@
+# Register Allocation (x64 Backend)
+
+`HostLoc`s contain values. A `HostLoc` ("host value location") is either a host CPU register or a host spill location.
+
+Values once set cannot be changed. Values can however be moved by the register allocator between `HostLoc`s. This is handled by the register allocator itself and code that uses the register allocator need not and should not move values between registers.
+
+The register allocator is based on three concepts: `Use`, `Def` and `Scratch`.
+
+* `Use`: The use of a value.
+* `Def`: The definition of a value, this is the only time when a value is set.
+* `Scratch`: Allocate a register that can be freely modified as one wishes.
+
+Note that `Use`ing a value decrements its `use_count` by one. When the `use_count` reaches zero the value is discarded and no longer exists.
+
+The member functions on `RegAlloc` are just a combination of the above concepts.
+
+### `Scratch`
+
+    Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr)
+    Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm)
+
+At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope.
+
+### Pure `Use`
+
+    Xbyak::Reg64 UseGpr(IR::Value use_value, HostLocList desired_locations = any_gpr);
+    Xbyak::Xmm UseXmm(IR::Value use_value, HostLocList desired_locations = any_xmm);
+    OpArg UseOpArg(IR::Value use_value, HostLocList desired_locations);
+
+At runtime, the value corresponding to `use_value` will be placed into one of the `HostLoc`s specified by `desired_locations`. The return value is the actual location.
+
+This register **must not** have it's value changed.
+
+* `UseGpr`: The location is a GPR.
+* `UseXmm`: The location is an XMM register.
+* `UseOpArg`: The location may be one of the locations specified by `desired_locations`, but may also be a host memory reference.
+
+### `UseScratch`
+
+    Xbyak::Reg64 UseScratchGpr(IR::Value use_value, HostLocList desired_locations = any_gpr)
+    Xbyak::Xmm UseScratchXmm(IR::Value use_value, HostLocList desired_locations = any_xmm)
+
+At runtime, the value corresponding to `use_value` will be placed into one of the `HostLoc`s specified by `desired_locations`. The return value is the actual location.
+
+You are free to modify the register. The register is discarded at the end of the allocation scope.
+
+### `Def`
+
+A `Def` is the defintion of a value. This is the only time when a value may be set.
+
+    Xbyak::Xmm DefXmm(IR::Inst* def_inst, HostLocList desired_locations = any_xmm)
+    Xbyak::Reg64 DefGpr(IR::Inst* def_inst, HostLocList desired_locations = any_gpr)
+
+By calling `DefXmm` or `DefGpr`, you are stating that you wish to define the value for `def_inst`, and you wish to write the value to one of the `HostLoc`s specified by `desired_locations`. You must write the value to the register returned.
+
+### `AddDef`
+
+Adding a `Def` to an existing value.
+
+    void RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst);
+
+You are declaring that the value for `def_inst` is the same as the value for `use_inst`. No host machine instructions are emitted.
+
+### `UseDef`
+
+    Xbyak::Reg64 UseDefGpr(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_gpr)
+    Xbyak::Xmm UseDefXmm(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_xmm)
+
+At runtime, the value corresponding to `use_value` will be placed into one of the `HostLoc`s specified by `desired_locations`. The return value is the actual location. You must write the value correponding to `def_inst` by the end of the allocation scope.
+
+### `UseDef` (OpArg variant)
+
+    std::tuple<OpArg, Xbyak::Reg64> UseDefOpArgGpr(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_gpr)
+    std::tuple<OpArg, Xbyak::Xmm> UseDefOpArgXmm(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_xmm)
+
+These have the same semantics as `UseDefGpr` and `UseDefXmm` except `use_value` may not be present in the register, and may actually be in a host memory location.
+
+## When to use each?
+
+The variety of different ways to `Use` and `Def` values are for performance reasons.
+
+* `UseDef`: Instead of performing a `Use` and a `Def`, `UseDef` uses one less register in the case when this `Use` is the last `Use` of a value.
+* `UseScratch`: Instead of performing a `Use` and a `Scratch`, `UseScratch` uses one less register in the case when this `Use` is the last `Use` of a value.
+* `AddDef`: This drastically reduces the number of registers required when it can be used. It can be used when values are truncations of other values. For example, if `u8_value` contains the truncation of `u32_value`, `AddDef(u8_value, u32_value)` is a valid definition of `u8_value`.
+* OpArg variants: Save host code-cache by merging memory loads into other instructions instead of the register allocator having to emit a `mov`.
\ No newline at end of file
diff --git a/docs/ReturnStackBufferOptimization.md b/docs/ReturnStackBufferOptimization.md
new file mode 100644
index 00000000..caf2f423
--- /dev/null
+++ b/docs/ReturnStackBufferOptimization.md
@@ -0,0 +1,147 @@
+# Return Stack Buffer Optimization (x64 Backend)
+
+One of the optimizations that dynarmic does is block-linking. Block-linking is done when
+the destination address of a jump is available at JIT-time. Instead of returning to the
+dispatcher at the end of a block we can perform block-linking: just jump directly to the
+next block. This is beneficial because returning to the dispatcher can often be quite
+expensive.
+
+What should we do in cases when we can't predict the destination address? The eponymous
+example is when executing a return statement at the end of a function; the return address
+is not statically known at compile time.
+
+We deal with this by using a return stack buffer: When we execute a call instruction,
+we push our prediction onto the RSB. When we execute a return instruction, we pop a
+prediction off the RSB. If the prediction is a hit, we immediately jump to the relevant
+compiled block. Otherwise, we return to the dispatcher.
+
+This is the essential idea behind this optimization.
+
+## `UniqueHash`
+
+One complication dynarmic has is that a compiled block is not uniquely identifiable by
+the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by
+computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block.
+
+    u64 LocationDescriptor::UniqueHash() const {
+        // This value MUST BE UNIQUE.
+        // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
+        u64 pc_u64 = u64(arm_pc);
+        u64 fpscr_u64 = u64(fpscr.Value()) << 32;
+        u64 t_u64 = cpsr.T() ? (1ull << 35) : 0;
+        u64 e_u64 = cpsr.E() ? (1ull << 39) : 0;
+        return pc_u64 | fpscr_u64 | t_u64 | e_u64;
+    }
+
+## Our implementation isn't actually a stack
+
+Dynarmic's RSB isn't actually a stack. It was implemented as a ring buffer because
+that showed better performance in tests.
+
+### RSB Structure
+
+The RSB is implemented as a ring buffer. `rsb_ptr` is the index of the insertion
+point. Each element in `rsb_location_descriptors` is a `UniqueHash` and they
+each correspond to an element in `rsb_codeptrs`. `rsb_codeptrs` contains the
+host addresses for the corresponding the compiled blocks.
+
+`RSBSize` was chosen by performance testing. Note that this is bigger than the
+size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8
+showed degraded performance.
+
+    struct JitState {
+        // ...
+
+        static constexpr size_t RSBSize = 8; // MUST be a power of 2.
+        u32 rsb_ptr = 0;
+        std::array<u64, RSBSize> rsb_location_descriptors;
+        std::array<u64, RSBSize> rsb_codeptrs;
+        void ResetRSB();
+
+        // ...
+    };
+
+### RSB Push
+
+We insert our prediction at the insertion point iff the RSB doesn't already
+contain a prediction with the same `UniqueHash`.
+
+    void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
+        using namespace Xbyak::util;
+
+        ASSERT(inst->GetArg(0).IsImmediate());
+        u64 imm64 = inst->GetArg(0).GetU64();
+
+        Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
+        Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
+        Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
+        u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
+                        ? u64(unique_hash_to_code_ptr[imm64])
+                        : u64(code->GetReturnFromRunCodeAddress());
+
+        code->mov(index_reg, dword[r15 + offsetof(JitState, rsb_ptr)]);
+        code->add(index_reg, 1);
+        code->and_(index_reg, u32(JitState::RSBSize - 1));
+
+        code->mov(loc_desc_reg, u64(imm64));
+        CodePtr patch_location = code->getCurr<CodePtr>();
+        patch_unique_hash_locations[imm64].emplace_back(patch_location);
+        code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch.
+        code->EnsurePatchLocationSize(patch_location, 10);
+
+        Xbyak::Label label;
+        for (size_t i = 0; i < JitState::RSBSize; ++i) {
+            code->cmp(loc_desc_reg, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
+            code->je(label, code->T_SHORT);
+        }
+
+        code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg);
+        code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
+        code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
+        code->L(label);
+    }
+
+In pseudocode:
+
+      for (i := 0 .. RSBSize-1)
+          if (rsb_location_descriptors[i] == imm64)
+            goto label;
+      rsb_ptr++;
+      rsb_ptr %= RSBSize;
+      rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash
+      rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */;
+    label:
+
+## RSB Pop
+
+To check if a predicition is in the RSB, we linearly scan the RSB.
+
+    void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) {
+        using namespace Xbyak::util;
+
+        // This calculation has to match up with IREmitter::PushRSB
+        code->mov(ebx, MJitStateCpsr());
+        code->mov(ecx, MJitStateReg(Arm::Reg::PC));
+        code->and_(ebx, u32((1 << 5) | (1 << 9)));
+        code->shr(ebx, 2);
+        code->or_(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
+        code->shl(rbx, 32);
+        code->or_(rbx, rcx);
+
+        code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
+        for (size_t i = 0; i < JitState::RSBSize; ++i) {
+            code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
+            code->cmove(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
+        }
+
+        code->jmp(rax);
+    }
+
+In pseudocode:
+
+    rbx := ComputeUniqueHash()
+    rax := ReturnToDispatch
+    for (i := 0 .. RSBSize-1)
+       if (rbx == rsb_location_descriptors[i])
+          rax = rsb_codeptrs[i]
+    goto rax
\ No newline at end of file