Xen源代码分析(一)——head.s
启动汇编部分代码是xen的引导启动程序,位于./xen/arch/x86/boot目录下。代码描述了从xen加载到调用第一个C函数“__start_xen”之间的初始化系统环境过程。主要涉及的文件流程为head.S->trampoline.S->x86_32.s,其中head.s为冲GRUB进入XEN的入口文件,首先看看head.s部分都做了什么(只看32位体系)。
/* 只能由 grub来引导,head.S 是从GRUB进入XEN的入口文件;
开始执行的第一个汇编文件,包括初始化页表,解析早期命令行参数等工作
*/
#include <xen/config.h>
#include <xen/multiboot.h>
#include <public/xen.h>
#include <asm/asm_defns.h>
#include <asm/desc.h>
#include <asm/page.h>
#include <asm/msr.h>
.text
.code32
/*当xen运行时,cpu已经处于保护模式了,和LINUX内核的处理方式一致,虚拟地址等于物理地址加上固定值*/
/*在xen\include\asm-x86\x86_32\page.h中有__XEN_VIRT_START的定义*/
#define sym_phys(sym) ((sym) - __XEN_VIRT_START)
/**
*xen 编译时的映像布局由xen\arch\x86\xen.lds.S控制:
...
#ifdef __x86_64__
#define FORMAT "elf64-x86-64"
#else
#define FORMAT "elf32-i386"
#endif
ENTRY(start)
#endif
OUTPUT_FORMAT(FORMAT, FORMAT, FORMAT)
#ifdef __x86_64__
OUTPUT_ARCH(i386:x86-64)
#else
OUTPUT_ARCH(i386)
#endif
PHDRS
{
text PT_LOAD ;
}
SECTIONS
{
. = __XEN_VIRT_START + 0x100000;
_start = .;
.text : {
_stext = .; //Text and read-only data
*(.text)
*(.text.cold)
*(.text.unlikely)
*(.fixup)
*(.gnu.warning)
_etext = .; //End of text section
} :text = 0x9090
...
**/
/*根据INTEL手册GDT第一项无用,故而从0x08开始*/
/*ring0,code,32-bit mode*/
#define BOOT_CS32 0x0008
/*ring0,code,64-bit mode*/
#define BOOT_CS64 0x0010
/*ring0,data*/
#define BOOT_DS 0x0018
/*real-mode code*/
#define BOOT_PSEUDORM_CS 0x0020
/*5 real-mode data*/
#define BOOT_PSEUDORM_DS 0x0028
ENTRY(start)
jmp __start
.align 4
/*** MULTIBOOT HEADER ****/
#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_HEADER_MODS_ALIGNED | \
MULTIBOOT_HEADER_WANT_MEMORY)
/* Magic number indicating a Multiboot header. */
.long MULTIBOOT_HEADER_MAGIC
/* Flags to bootloader (see Multiboot spec). */
.long MULTIBOOT_HEADER_FLAGS
/* Checksum: must be the negated sum of the first two fields. */
.long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
/**
* 上面的定义是给grub看的,表明支持multiboot,详细内容见multiboot协议
**/
.section .init.text, "ax"
/*.asciz is just like .ascii, but each string is followed by a zero
byte.*/
.Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
.Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
bad_cpu:/*打印bad cpu错误*/
mov $(sym_phys(.Lbad_cpu_msg)),%esi # Error message
jmp print_err
not_multiboot:/*打印非多启动错误*/
mov $(sym_phys(.Lbad_ldr_msg)),%esi # Error message
print_err:/*这里的打印用的是最基本的往显卡缓存写入数据的方式*/
mov $0xB8000,%edi # VGA framebuffer
1: mov (%esi),%bl
test %bl,%bl # Terminate on '\0' sentinel
2: je 2b
mov $0x3f8+5,%dx # UART Line Status Register
3: in %dx,%al
test $0x20,%al # Test THR Empty flag
je 3b
mov $0x3f8+0,%dx # UART Transmit Holding Register
mov %bl,%al
out %al,%dx # Send a character over the serial line
movsb # Write a character to the VGA framebuffer
mov $7,%al
stosb # Write an attribute to the VGA framebuffer
jmp 1b
gdt_boot_descr:/*GDT定义,传统模式下的全局描述符表寄存器(GDTR)长48位,由16位的界限和32位的基地址构成。由于段描述符总是8字节长,故界限的值应为8N-1。
Trampoline_gdt共定义了6个描述符项,界限是6*8-1。*/
.word 6*8-1
.long sym_phys(trampoline_gdt)
__start:
cld
cli
/* Initialise GDT and basic data segments. */
lgdt %cs:sym_phys(gdt_boot_descr)
mov $BOOT_DS,%ecx
mov %ecx,%ds
mov %ecx,%es
mov %ecx,%ss
/*
验证并存储多重启动信息,详见“多重启动规范”。
当boot loader引导32位操作系统的时候,机器必须有如下的状态:
EAX:
必须包含魔数0X2BADB002,这个值告诉操作系统目前它是由兼容的Multiboot的boot loader引导的。
EBX:
必须包含boot loader提供的多重引导信息结构的32位物理地址。
CS:
必须是32位的读/执行的代码段,偏移是0以及界限是 0XFFFFFFFF。具体值没有定义。
SS:
必须是32位的读/执行数据段,偏移是0以及界限是 0XFFFFFFFF。具体值没有定义。
A20 GATE :
必须enable。
CR0:
31位(PG)必须清除,第0位(PE)必须设置。其他位没有定义。
EFLAGS:
第17(VM)位必须清除,第9位(IF)必须清除,其他位没有定义。
*/
/* Check for Multiboot bootloader */
cmp $0x2BADB002,%eax
jne not_multiboot
/* Set up trampoline segment 64k below EBDA */
movzwl 0x40e,%eax /* EBDA segment */
cmp $0xa000,%eax /* sanity check (high) */
jae 0f
cmp $0x4000,%eax /* sanity check (low) */
jae 1f
0:
movzwl 0x413,%eax /* use base memory size on failure */
shl $10-4,%eax
1:
sub $0x1000,%eax
/* From arch/x86/smpboot.c: start_eip had better be page-aligned! */
xor %al, %al
shl $4, %eax
mov %eax,sym_phys(trampoline_phys)
/* Save the Multiboot info struct (after relocation) for later use. */
mov $sym_phys(cpu0_stack)+1024,%esp
push %ebx
call reloc
mov %eax,sym_phys(multiboot_ptr)
/* Initialize BSS (no nasty surprises!) */
/*初始化BSS段,存放程序中未初始化的全局变量。
BSS段在xen\arch\x86\x86_32\xen.lds.S中定义*/
mov $sym_phys(__bss_start),%edi
mov $sym_phys(_end),%ecx
sub %edi,%ecx
xor %eax,%eax
rep stosb
/*
查询并保存CPU拓展信息。
CPUID指令可提供关于处理器的实现及其能力的完整信息,任意特权级的软件都可以使用它。
EAX寄存器用于决定CPUID生成什么信息
EAX = 0x80000000,返回信息:
EAX: Maximum Input Value for Extended Function CPUID Information. PIV之后的CPU,均大于0x80000000
EBX: Reserved
ECX: Reserved
EDX: Reserved
EAX = 0x80000001,返回信息:
EAX: Extended Processor Signature and Feature Bits.
EBX: Reserved
ECX: Bit 0: LAHF/SAHF available in 64-bit mode
Bits 31-1 Reserved
EDX: Bits 10-0: Reserved
Bit 11: SYSCALL/SYSRET available (when in 64-bit mode)
Bits 19-12: Reserved = 0
Bit 20: Execute Disable Bit available
Bits 28-21: Reserved = 0
Bit 29: Intel? 64 Architecture available if 1
Bits 31-30: Reserved = 0
cpuid_ext_features在xen\arch\x86\boot\trampoline.S中定义。boot_cpu_data在\xen\include\asm-x86 \ processor.h中定义,是cpuinfo_x86的实例。
CPUINFO86_ext_features 在xen\arch\x86\x86_32 \ asm-offsets.c中定义:OFFSET(CPUINFO86_ext_features, struct cpuinfo_x86, x86_capability[1]);
OFFSET解释如下:
#define offsetof (s, m) (size_t)&(((s*)0)->m)
m为结构体s中的一项,返回m距结构体起始地址的偏移量。ANSI C中常数0允许转换成任何类型的指针,但转换后指针为NULL。例中&(((s*)0)->m)这一步,并不访问m元素,只是获取m的地址,编译时不生成访问m的代码。
#define DEFINE(_sym,_val) __asm__ __volatile__ (“\n->” #_sym “%0” #_val:: “i”(_val))
#是注释符号;%0是占位符,这里指代“i”(_val)。
#define OFFSET(_sym, _str, _mem) DEFINE(_sym, offsetof(_str, _mem))
这条宏是将_str结构体的_mem项的偏移量赋值给_sym。
*/
/* Interrogate CPU extended features via CPUID. */
mov $0x80000000,%eax
cpuid
xor %edx,%edx
cmp $0x80000000,%eax # any function > 0x80000000?
jbe 1f
mov $0x80000001,%eax
cpuid
1: mov %edx,sym_phys(cpuid_ext_features)
mov %edx,sym_phys(boot_cpu_data)+CPUINFO86_ext_features
#if defined(__x86_64__)
/* Check for availability of long mode. */
bt $29,%edx
jnc bad_cpu
/* Initialise L2 identity-map and xen page table entries (16MB). */
mov $sym_phys(l2_identmap),%edi
mov $sym_phys(l2_xenmap),%esi
mov $sym_phys(l2_bootmap),%edx
mov $0x1e3,%eax /* PRESENT+RW+A+D+2MB+GLOBAL */
mov $8,%ecx
1: mov %eax,(%edi)
add $8,%edi
mov %eax,(%esi)
add $8,%esi
mov %eax,(%edx)
add $8,%edx
add $(1<<L2_PAGETABLE_SHIFT),%eax
loop 1b
/* Initialise L3 identity-map page directory entries. */
mov $sym_phys(l3_identmap),%edi
mov $(sym_phys(l2_identmap)+7),%eax
mov $4,%ecx
1: mov %eax,(%edi)
add $8,%edi
add $PAGE_SIZE,%eax
loop 1b
/* Initialise L3 xen-map page directory entry. */
mov $(sym_phys(l2_xenmap)+7),%eax
mov %eax,sym_phys(l3_xenmap) + l3_table_offset(XEN_VIRT_START)*8
/* Initialise L3 boot-map page directory entry. */
mov $(sym_phys(l2_bootmap)+7),%eax
mov %eax,sym_phys(l3_bootmap) + 0*8
/* Hook identity-map, xen-map, and boot-map L3 tables into PML4. */
mov $(sym_phys(l3_bootmap)+7),%eax
mov %eax,sym_phys(idle_pg_table) + 0*8
mov $(sym_phys(l3_identmap)+7),%eax
mov %eax,sym_phys(idle_pg_table) + l4_table_offset(DIRECTMAP_VIRT_START)*8
mov $(sym_phys(l3_xenmap)+7),%eax
mov %eax,sym_phys(idle_pg_table) + l4_table_offset(XEN_VIRT_START)*8
#else
/*32位下2M页面大小,开PAE方式映射,在这里我们也看出32位内核为XEN需要开启PAE,初始化页表,
将线性空间的0-12M和__PAGE_OFFSET-__PAGE_OFFSET+12M都映射到物理地址的0-12M;而将线性空间的12M-16M映射到物理地址的12M-16M(注意,这时并没有启用分页机制):
*/
/* Initialize low and high mappings of memory with 2MB pages */
mov $sym_phys(idle_pg_table_l2),%edi
mov $0xe3,%eax /* PRESENT+RW+A+D+2MB */
1: mov %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
stosl /* low mapping */
add $4,%edi
add $(1<<L2_PAGETABLE_SHIFT),%eax
cmp $DIRECTMAP_PHYS_END+0xe3,%eax
jne 1b
1: stosl /* low mappings cover up to 16MB */
add $4,%edi
add $(1<<L2_PAGETABLE_SHIFT),%eax
cmp $(16<<20)+0xe3,%eax
jne 1b
#endif
/* Initialize 4kB mappings of first 2MB or 4MB of memory. */
mov $sym_phys(l1_identmap),%edi
mov $0x263,%eax /* PRESENT+RW+A+D+SMALL_PAGES */
#if defined(__x86_64__)
or $0x100,%eax /* GLOBAL */
#endif
xor %ecx,%ecx
1: stosl
add $4,%edi
add $PAGE_SIZE,%eax
inc %ecx
/* VGA hole (0xa0000-0xc0000) should be mapped UC. */
cmp $0xa0,%ecx
jne 2f
or $0x10,%eax /* +PCD */
2: cmp $0xc0,%ecx
jne 2f
and $~0x10,%eax /* -PCD */
2: cmp $L1_PAGETABLE_ENTRIES,%ecx
jne 1b
sub $(PAGE_SIZE-0x63),%edi
#if defined(__x86_64__)
mov %edi,sym_phys(l2_identmap)
mov %edi,sym_phys(l2_xenmap)
mov %edi,sym_phys(l2_bootmap)
#else
mov %edi,sym_phys(idle_pg_table_l2)
mov %edi,sym_phys(idle_pg_table_l2) + (__PAGE_OFFSET>>18)
#endif
/* Apply relocations to bootstrap trampoline. */
mov sym_phys(trampoline_phys),%edx
mov $sym_phys(__trampoline_rel_start),%edi
mov %edx,sym_phys(trampoline_phys)
1:
mov (%edi),%eax
add %edx,(%edi,%eax)
add $4,%edi
cmp $sym_phys(__trampoline_rel_stop),%edi
jb 1b
/* Patch in the trampoline segment. */
shr $4,%edx
mov $sym_phys(__trampoline_seg_start),%edi
1:
mov (%edi),%eax
mov %dx,(%edi,%eax)
add $4,%edi
cmp $sym_phys(__trampoline_seg_stop),%edi
jb 1b
call cmdline_parse_early
/* Switch to low-memory stack. */
mov sym_phys(trampoline_phys),%edi
lea 0x10000(%edi),%esp
lea trampoline_boot_cpu_entry-trampoline_start(%edi),%eax
pushl $BOOT_CS32
push %eax
/* Copy bootstrap trampoline to low memory, below 1MB. */
mov $sym_phys(trampoline_start),%esi
mov $trampoline_end - trampoline_start,%ecx
rep movsb
/* Jump into the relocated trampoline. */
/*由上面的push代码段和IP后在这里执行ret相当于两个pop指令,直接跳转到trampoline.s中*/
lret
#include "cmdline.S"
reloc:
#include "reloc.S"
.align 16
.globl trampoline_start, trampoline_end
/*第二阶段初始化,实模式*/
trampoline_start:
#include "trampoline.S"
trampoline_end:
.text
/*第三阶段初始化*/
__high_start:
#ifdef __x86_64__
#include "x86_64.S"
#else
#include "x86_32.S"
#endif