转自:http://blog.csdn.net/tommy_wxie/article/details/17093307
上一篇微博留下了这几个函数,现在我们来分析它们
sanity_check_meminfo();
arm_memblock_init(&meminfo, mdesc);
paging_init(mdesc);
request_standard_resources(mdesc);
在上一微博有展现根据启动参数初始化meminfo,记录了物理内存的开始和大小
sanity_check_meminfo();
有mmu的情况下这个函数才有意义,初始化高端内存,首先内核要选上这个
KernelFeatures下的
[*]High Memory Support
arch/arm/include/asm/setup.h
#ifdef CONFIG_ARCH_EP93XX
# define NR_BANKS 16
#else
# define NR_BANKS 8 三星当然是8
#endif
struct membank {
phys_addr_t start;
unsigned longsize;
unsigned inthighmem;
};如果是高端内存highmem为1
struct meminfo {
int nr_banks;
struct membankbank[NR_BANKS];
};
extern struct meminfo meminfo;
我们现在的函数就是初始化meminfo这个全局变量
高端内存
Linux内核的地址空间是3G~4G。假如说机器的内存为512M,那么内存的物
理地址范围是:0~512,而映射到内核空间的范围是3G~3G+512M(可以叫low memory).
而其余的空间都是高端内存的范围,即:3+512G~4G,但是为了避免越界等安全问题
的考虑,高端内存又离开了低端内存8M空间,即从3G+512M+8M空间开始。linux内核又规定,高端内存至少为128M,即加入物理内存为1G,那么高端内存就是从896M~4G,即其最大地址:0xC0000000+896M,实际:0xC0000000+x(内存size)
简单举个例子,假设你有2G内存,而内核只有1G不能全部做线性映射,内核就会把前896M用于RAM线性映射,后128M可以通过更改映射关系访问剩下的内存。有三种方法:永久内核映射,临时映射,非连续内存分配(这些以后写关于内存管理的文章时再分析)。
没有全部贴
void __init sanity_check_meminfo(void)
{
int i, j, highmem= 0;
//wxl add
printk(KERN_NOTICE"vmalloc_min = %lx\n", vmalloc_min);
打印结果
vmalloc_min = ee000000
vmalloc_min = (void *)(VMALLOC_END - SZ_128M);
arch/arm/mach-s3c2410/include/mach/vmalloc.h
#define VMALLOC_END 0xF6000000UL
0xF6000000-0x8000000=0xEE000000
3808M
for (i = 0, j = 0;i < meminfo.nr_banks; i++) {
structmembank *bank = &meminfo.bank[j];
*bank =meminfo.bank[i];
#ifdef CONFIG_HIGHMEM
_va()是物理地址转换虚拟地址
#define __virt_to_phys(x) ((x) - PAGE_OFFSET + PHYS_OFFSET)
#define __phys_to_virt(x) ((x) - PHYS_OFFSET + PAGE_OFFSET)
#define __va(x) ((void *)__phys_to_virt((unsigned long)(x)))
下面的条件告诉了我们高端地址范围,大于等于vmalloc_min的好理解,小于PAGE_OFFSET是永久内核映射
if(__va(bank->start) >= vmalloc_min ||
__va(bank->start) < (void *) PAGE_OFFSET)
highmem = 1;
//wxl add
printk(KERN_NOTICE "start:bank->start = %lx bank->size = %lx __va = %lx highmem = %d\n",(unsigned long)bank->start, (unsigned long)bank->size, (unsignedlong)__va(bank->start), highmem);
打印结果
start: bank->start = 30000000 bank->size = 4000000 __va =c0000000 highmem = 0
bank->start bank->size就是上一篇微博提到的
bank->highmem = highmem;
/*
* Splitthose memory banks which are partially overlapping
* thevmalloc area greatly simplifying things later.
*/
假设__va(bank->start) < vmalloc_min;它的大小可能会超过低端内存,也就是起始地址在低端,结束地址超过低端范围,那么就要把它分开,你可以简单看看代码
if(__va(bank->start) < vmalloc_min &&
bank->size > vmalloc_min - __va(bank->start)) {
if(meminfo.nr_banks >= NR_BANKS) {
printk(KERN_CRIT "NR_BANKS too low, "
"ignoringhigh memory\n");
}else {
memmove(bank + 1, bank,
(meminfo.nr_banks - i) * sizeof(*bank));
meminfo.nr_banks++;
i++;
bank[1].size -= vmalloc_min - __va(bank->start);
bank[1].start = __pa(vmalloc_min - 1) + 1;
bank[1].highmem = highmem = 1;
j++;
}
bank->size = vmalloc_min - __va(bank->start);
}
//wxl add
printk(KERN_NOTICE "end: bank->start = %lx bank->size =%lx\n", (unsigned long)bank->start, (unsigned long)bank->size);
打印结果
end: bank->start = 30000000 bank->size = 4000000
#else
……
#endif
重设低端内存限制
if(!bank->highmem && bank->start + bank->size > lowmem_limit)
lowmem_limit =bank->start + bank->size;
j++;
}
……
arm_memblock_init(&meminfo, mdesc);
在此处按地址数据从小到大排序meminfo中的数据,并初始化全局的memblock数据。
void __init arm_memblock_init(struct meminfo *mi, structmachine_desc *mdesc)
{
int i;
sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]),meminfo_cmp, NULL);排序不细看了,而且我的就一个bank
memblock_init();
这个就是对memblock变量初始化,该赋初值的符初值,该清零的清零。说一点
memblock里有个memory是struct memblock_type
struct memblock_region {
phys_addr_t base;
phys_addr_t size;
};
struct memblock_type {
unsigned longcnt; /* number of regions */
unsigned longmax; /* size of the allocated array*/
structmemblock_region *regions;
};
初始化
static struct memblock_regionmemblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
static struct memblock_regionmemblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
memblock.memory.max = INIT_MEMBLOCK_REGIONS; 值为128
memblock.memory.cnt = 1;
memblock.memory.regions[0].base = 0;
memblock.memory.regions[0].size = 0;
memblock.memory.regions[INIT_MEMBLOCK_REGIONS].base =MEMBLOCK_INACTIVE;
MEMBLOCK_INACTIVE为0x44c9e71bUL
memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE;
#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0)这样看你是多少位系统了,不过我们只要知道是anywhere
memblock还有个reserved,和memory初始化的值一样。
for (i = 0; i <mi->nr_banks; i++)
memblock_add(mi->bank[i].start, mi->bank[i].size);
long __init_memblock memblock_add(phys_addr_t base, phys_addr_tsize)
{
return memblock_add_region(&memblock.memory,base, size);
我的base 0x30000000 size = 0x4000000;
}
static long __init_memblock memblock_add_region(structmemblock_type *type,
phys_addr_t base, phys_addr_t size)
{
phys_addr_t end =base + size;某bank的结束地址
int i, slot = -1;
/* First try andcoalesce this MEMBLOCK with others */
for (i = 0; i <type->cnt; i++) {
structmemblock_region *rgn = &type->regions[i];
phys_addr_t rend = rgn->base +rgn->size;
/* Exit ifthere's no possible hits */
if(rgn->base > end || rgn->size == 0)检查是否在当前的bank中且size是否为0
break;
/* Checkif we are fully enclosed within an existing
* block
*/
if(rgn->base <= base && rend >= end)检查是否超过当前block范围
return 0;
/* Checkif we overlap or are adjacent with the bottom
* of a block.
*/
if (base< rgn->base && end >= rgn->base) {
/*If we can't coalesce, create a new block */
if(!memblock_memory_can_coalesce(…这个函数一定返回1所以省去,下同
/*We extend the bottom of the block down to our
*base
*/
rgn->base = base;
rgn->size = rend - base;
/* Return if we have nothingelse to allocate
*(fully coalesced)
*/
if(rend >= end)
return 0;
/*We continue processing from the end of the
*coalesced block.
*/
base = rend;
size = end - base;
上面这一段就是把去掉低端重叠区,
}
/* Now check if we overlap or areadjacent with the
* top ofa block
*/
顶部的重叠区去处
if (base<= rend && end >= rend) {
/*If we can't coalesce, create a new block */
if(!memblock_memory_can_coalesce(…
size += (base - rgn->base);
base = rgn->base;
memblock_remove_region(type, i--);
for (i = r; i < type->cnt - 1; i++) {
type->regions[i].base = type->regions[i + 1].base;
type->regions[i].size = type->regions[i + 1].size;
}
type->cnt--;
有重叠说明连续的,就把它合并到一起,
}
}
/* If the array isempty, special case, replace the fake
* filler regionand return
*/
if ((type->cnt== 1) && (type->regions[0].size == 0)) {
我的平台现在调用会执行到这
type->regions[0].base = base; 0x30000000
type->regions[0].size = size; 0x4000000
return 0;
}
new_block:新的block
/* If we are outof space, we fail. It's too late to resize the array
* but then thisshouldn't have happened in the first place.
*/
if(WARN_ON(type->cnt >= type->max))超过最大就返回
return -1;
/* Couldn'tcoalesce the MEMBLOCK, so add it to the sorted table. */
不能合并我们按顺序存到regions中
for (i =type->cnt - 1; i >= 0; i--) {
if (base< type->regions[i].base) {
type->regions[i+1].base = type->regions[i].base;
type->regions[i+1].size = type->regions[i].size;
} else {
type->regions[i+1].base = base;
type->regions[i+1].size = size;
slot = i + 1;
break;
}
}
if (base <type->regions[0].base) {
type->regions[0].base = base;
type->regions[0].size = size;
slot = 0;
}
type->cnt++;
/* The array isfull ? Try to resize it. If that fails, we undo
* our allocationand return an error
*/
满了尝试重定义大小
if (type->cnt== type->max && memblock_double_array(type)) {
BUG_ON(slot < 0);
memblock_remove_region(type, slot);
return -1;
}
return 0;
}
看了源码其实就是把之前的bank信息存到memblock.memory.regions中。
/* Register thekernel text, kernel data and initrd with memblock. */
Kernel XIP 原理如下,内核映像在Flash 设备上执行以后,只把映像中要读写的.data和.bss 拷贝到SDRAM 主存中,同时设置好系统的MMU,内核运行过程中,代码段.text 指向Flash 空间,.data 和.bss 指向SDRAM 主存空间。相对于全映射的执行方式,系统节省了解压缩和拷贝代码段的时间,节省了代码段占用的RAM 主存空间。
我的没有用这个东西。不过从下面你可以看到XIP没有吧text存入memblock
#ifdef CONFIG_XIP_KERNEL
memblock_reserve(__pa(_sdata), _end - _sdata);
#else
memblock_reserve(__pa(_stext),_end - _stext);
在System.map下
c00081e0 T _stext
c0318000 D _sdata
c0367db8 A _end
__pa(_stext) = 0x300081e0
#endif
long __init_memblock memblock_reserve(phys_addr_t base,phys_addr_t size)
{
structmemblock_type *_rgn = &memblock.reserved;
BUG_ON(0 == size);
returnmemblock_add_region(_rgn, base, size);这个看上面
}
通过上面的我们可以算出,不过还是加个打印吧
//wxl add
printk(KERN_NOTICE "memory:\n");
for (i = 0; i <memblock.memory.cnt; i++)
{
printk(KERN_NOTICE"regions[%d] base = %lx size = %lx\n", i, (unsignedlong)memblock.memory.regions[i].base, (unsigned long)memblock.memory.regions[i].size);
}
printk(KERN_NOTICE"reserved:\n");
for (i = 0; i <memblock.reserved.cnt; i++)
{
printk(KERN_NOTICE"regions[%d] base = %lx size = %lx\n", i, (unsignedlong)memblock.reserved.regions[i].base, (unsigned long)memblock.reserved.regions[i].size);
}
打印结果
memory:
regions[0] base = 30000000 size = 4000000
reserved:
regions[0] base = 300081e0 size = 35fbd8
用上面计算也是这个结果,到此reserved应该就是记录内核的大小,不过下面它还要做些事
下面和initrd的使用有关
为了能够使用RAM disk你的内核必须要支持RAMdisk,即:在编译内核时,要选中RAMdisk support这一选项,会在配置文件中定义CONFIG_BLK_DEV_RAM。
为了让内核有能力在内核加载阶段就能装入RAMDISK,并运行其中的内容,要选中initial RAM disk(initrd) support 选项,会在配置文件中定义CONFIG_BLK_DEV_INITRD。
http://wenku.baidu.com/view/dc6dc785bceb19e8b8f6baba.html
此链接是一篇关于initramfs和initrd的文章,有兴趣看看
Initrd是一个临时的文件系统。在某些没有存储设备的嵌入式系统中,initrd是永久的根文件系统。
它就是个文件系统,不过很小
initrd 中包含了实现这个目标所需要的目录和可执行程序的最小集合,例如将内核模块加载到内核中所使用的insmod 工具。
initrd 映像中包含了支持 Linux系统两阶段引导过程所需要的必要可执行程序和系统文件。
咱们看看它有什么
http://blog.163.com/dongfeng_114/blog/static/4664357420112452442211/
查看initrd的内容方法
我的是用cpio方法的,下面是我的pc中initrd的内容
[root@localhost tempfs]# ls
bin dev etc init initrd.img lib proc sbin sys sysroot
[root@localhost tempfs]# ls dev/
console ptmx ram1 tty tty10 tty2 tty5 tty8 ttyS1 zero
mapper ram rtc tty0 tty11 tty3 tty6 tty9 ttyS2
null ram0 systty tty1 tty12 tty4 tty7 ttyS0 ttyS3
[root@localhost tempfs]# ls sbin/
dmraid insmod kpartx lvm modprobe nash
最后说一下uboot的bootargs 要设置initrd=addr,[Size]M
大家看看自己思考思考吧,我们看下面的内存处理
#ifdef CONFIG_BLK_DEV_INITRD
先说phys_initrd_size,它初始化定义是0
static int __init parse_tag_initrd(const struct tag *tag)
{
printk(KERN_WARNING "ATAG_INITRD is deprecated; "
"please update your bootloader.\n");
phys_initrd_start= __virt_to_phys(tag->u.initrd.start);
phys_initrd_size =tag->u.initrd.size;
return 0;
}
__tagtable(ATAG_INITRD, parse_tag_initrd);
上面的东西看过我上一篇《linux内核启动1》应该不会陌生吧,就是把bootcmdline的root=后面的赋值到phys_initrd_start,phys_initrd_size;
if(phys_initrd_size &&
!memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) {
memblock_is_region_memory函数就是和memblock.memory比较看在不在此内存里面
pr_err("INITRD:0x%08lx+0x%08lx is not a memory region - disabling initrd\n",
phys_initrd_start, phys_initrd_size);
如果你看到这个打印,就是initrd不在可用内核范围内
phys_initrd_start = phys_initrd_size = 0;
}
if(phys_initrd_size &&
memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) {
memblock_is_region_reserved当然是和memblock.reserved比较
pr_err("INITRD:0x%08lx+0x%08lx overlaps in-use memory region - disabling initrd\n",
phys_initrd_start, phys_initrd_size);
如果你看到这个打印,就是initrd和内核重叠了
phys_initrd_start = phys_initrd_size = 0;
}
上面的两个判断就是在可用内存内且不能和内核重叠
if(phys_initrd_size) {
memblock_reserve(phys_initrd_start, phys_initrd_size);上面已解释过
/* Nowconvert initrd to virtual addresses */
initrd_start = __phys_to_virt(phys_initrd_start);转换为虚拟地址
initrd_end= initrd_start + phys_initrd_size;
}
#endif
reserved又记录initrd信息
arm_mm_memblock_reserve();
void __init arm_mm_memblock_reserve(void)
{
......
memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE);
……
存储页表空间
arm页表地址
c0004000 A swapper_pg_dir
arch/arm/kernel/head.S下
.equ swapper_pg_dir,KERNEL_RAM_VADDR - PG_DIR_SIZE
#define PG_DIR_SIZE 0x4000
KERNEL_RAM_VADDR是0x30008000
arm_dt_memblock_reserve();这个保存设备树的启动参数,不细看
/* reserve anyplatform specific memblock areas */
if(mdesc->reserve)对应平台自定义的block区
mdesc->reserve();
memblock_analyze();更新memblock中memory_size的值
memblock_dump_all();这个就是打印reserved和memory,和我中间加的打印东西差不多