Use MSR to map framebuffer as WC (write-combining) = huge speed diff on real HW

This commit is contained in:
2026-03-19 19:34:31 +01:00
parent 6a82d581fb
commit 424b4c4632
6 changed files with 133 additions and 2 deletions

13
include/arch/x86.h Normal file
View File

@@ -0,0 +1,13 @@
#ifndef X86_H
#define X86_H
#include <stdbool.h>
#include <stdint.h>
uint64_t rdmsr(uint32_t msr);
void cpuid(uint32_t leaf, uint32_t* eax, uint32_t* ebx, uint32_t* ecx, uint32_t* edx);
void wrmsr(uint32_t msr, uint64_t value);
bool x86_has_msr();
void x86_arch_init();
#endif

20
src/arch/x86/cpuid.c Normal file
View File

@@ -0,0 +1,20 @@
/*
* @author xamidev <xamidev@riseup.net>
* @brief x86 CPU identification
* @license GPL-3.0-only
*/
#include <stdint.h>
/*
* cpuid - Wrapper for CPUID instruction
* @leaf: Requested leaf
* @eax: EAX register value
* @ebx: EBX register value
* @ecx: ECX register value
* @edx: EDX register value
*/
void cpuid(uint32_t leaf, uint32_t* eax, uint32_t* ebx, uint32_t* ecx, uint32_t* edx)
{
__asm__ volatile("cpuid" : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) : "a"(leaf));
}

28
src/arch/x86/init.c Normal file
View File

@@ -0,0 +1,28 @@
/*
* @author xamidev <xamidev@riseup.net>
* @brief x86 architecture-dependant initialization
* @license GPL-3.0-only
*/
#include <stdint.h>
#include <arch/x86.h>
#include <kernel.h>
/*
* x86_arch_init - Initialize x86 CPU structures
*
* This function is responsible for overriding a PAT entry
* (to put the framebuffer area in WC mode) only.
*
* Later, all architecture-dependant init (GDT, IDT, TSS, ...)
* should be initialized here, and separate function pointers
* should be set up for each arch.
*/
void x86_arch_init()
{
uint64_t pat = rdmsr(0x277);
pat &= ~(0xFFULL << 8); // Clear PAT1
pat |= (0x01ULL << 8); // PAT1 = 0x01 (WC)
wrmsr(0x277, pat);
DEBUG("Overrode PAT1 entry to set up Write-Combining");
}

66
src/arch/x86/msr.c Normal file
View File

@@ -0,0 +1,66 @@
/*
* @author xamidev <xamidev@riseup.net>
* @brief x86 MSR C wrappers
* @description
* Wrapper functions to access Model Specific Registers
*
* @license GPL-3.0-only
*/
#include <stdint.h>
#include <stdbool.h>
#include <arch/x86.h>
/*
* rdmsr - Read from MSR
* @msr: model specific register number
*
* Read a 64-bit word from a Model Specific Register.
* Wrapper for the "rdmsr" instruction. It originally
* outputs to two 32-bit registers (EDX:EAX), so the
* function does the job of uniting them as a 64-bit
* value for us.
*
* Return:
* <value> - value read from MSR
*/
uint64_t rdmsr(uint32_t msr)
{
uint32_t low;
uint32_t high;
__asm__ volatile("rdmsr" : "=a"(low), "=d"(high) : "c"(msr));
return ((uint64_t)high << 32) | low;
}
/*
* wrmsr - Write to MSR
* @msr: model specific register number
*
* Write a 64-bit value to a Model Specific Register.
*/
void wrmsr(uint32_t msr, uint64_t value)
{
uint32_t low = (uint32_t)(value & 0xFFFFFFFF);
uint32_t high = (uint32_t)(value >> 32);
__asm__ volatile("wrmsr" : : "c"(msr), "a"(low), "d"(high) : "memory");
}
/*
* x86_has_msr - Test for MSR support
*
* Checks if CPU supports Model Specific Registers
* using CPUID.01h:EDX[bit 5].
*
* Return:
* true - MSR are supported
* false - MSR are not supported
*/
bool x86_has_msr()
{
uint32_t eax, ebx, ecx, edx;
cpuid(1, &eax, &ebx, &ecx, &edx);
return (edx & (1 << 5)) != 0;
}

View File

@@ -4,6 +4,7 @@
* @license GPL-3.0-only
*/
#include "arch/x86.h"
#include <stdbool.h>
#include <stddef.h>
#include <limine.h>
@@ -24,6 +25,7 @@
#include <config.h>
#include <io/term/flanterm.h>
#include <io/term/flanterm_backends/fb.h>
#include <arch/x86.h>
// Limine version used
__attribute__((used, section(".limine_requests")))
@@ -112,6 +114,8 @@ void kmain()
serial_init();
timer_init();
x86_arch_init();
boot_mem_display();
pmm_init(boot_ctx);

View File

@@ -202,9 +202,9 @@ void paging_init(struct boot_context boot_ctx)
uint64_t fb_size = fb->pitch * fb->height;
uint64_t fb_pages = (fb_size + PAGE_SIZE-1)/PAGE_SIZE;
// Map the framebuffer (with cache-disable & write-through)
// Map the framebuffer (PWT set, and no PCD means PAT1 [Write-Combining] for this region)
for (uint64_t i=0; i<fb_pages; i++) {
paging_map_page(kernel_pml4, fb_virt+i*PAGE_SIZE, fb_phys+i*PAGE_SIZE, PTE_WRITABLE | PTE_PCD | PTE_PWT);
paging_map_page(kernel_pml4, fb_virt+i*PAGE_SIZE, fb_phys+i*PAGE_SIZE, PTE_WRITABLE | PTE_PWT);
page_count++;
}
DEBUG("Mapped %u pages for framebuffer", page_count);