在开始之前,先看一下SSD202的内存使用范围
硬件上SSD202内置128MB内存,其中有一部分预留给MMA,MMAP以及CMA
具体的大小设置在bootargs 中
bootargs = "wt_board=WT2022 console=ttyS0,115200 rootfstype=squashfs,ubifs rootwait=1
cma=8M
LX_MEM=0x7f00000 mma_heap=mma_heap_name0,miu=0,sz=0x1000000
mma_memblock_remove=1 highres=off mmap_reserved=fb,miu=0,sz=0x300000
max_start_off=0x7C00000,max_end_off=0x7F00000";
即最大可使用内存0x7f00000=127MB,CMA占用8M,MMA使用16M,MMAP使用3M,剩下就是kernel可使用范围
预留内存相关启动日志如下:
[ 0.000000] LXmem is 0x7f00000 PHYS_OFFSET is 0x20000000
[ 0.000000] Add mem start 0x20000000 size 0x7f00000!!!!
[ 0.000000]
[ 0.000000] LX_MEM = 0x20000000, 0x7f00000 (16*7=112+15=127MB)
[ 0.000000] LX_MEM2 = 0x0, 0x0
[ 0.000000] LX_MEM3 = 0x0, 0x0
[ 0.000000] EMAC_LEN= 0x0
[ 0.000000] DRAM_LEN= 0x0
----mmap_reserved=fb,miu=0,sz=0x300000=3M
[ 0.000000] deal_with_reserved_mmap memblock_reserve success mmap_reserved_config[0].reserved_start=
[ 0.000000] 0x27c00000 == 0x27f00000 - 3M(0x00300000)
[ 0.000000]
---mma_heap=mma_heap_name0,miu=0,sz=0x1000000=16M
[ 0.000000] deal_with_reserve_mma_heap memblock_reserve success mma_config[0].reserved_start=
[ 0.000000] 0x26c00000 == 0x27c00000 - 16M(0x1000000)
---cma size = 8M
[ 0.000000] cma: Reserved 8 MiB at 0x26400000 = 0x26c00000 - 8M
[ 0.000000] Memory policy: Data cache writealloc
[ 0.000000] percpu: Embedded 13 pages/cpu @c62bc000 s21208 r8192 d23848 u53248
[ 0.000000] Built 1 zonelists in Zone order, mobility grouping on. Total pages: 28162
[ 0.000000] Kernel command line: wt_board=WT2022 console=ttyS0,115200 rootfstype=squashfs,ubifs rootwait=1 cma=8M LX_MEM=0x7f00000 mma_heap=mma_heap_na 。。。。。)
[ 0.000000] PID hash table entries: 512 (order: -1, 2048 bytes)
[ 0.000000] Dentry cache hash table entries: 16384 (order: 4, 65536 bytes)
[ 0.000000] Inode-cache hash table entries: 8192 (order: 3, 32768 bytes)
[ 0.000000] Memory: 96736K/113664Kavailable (2467K kernel code, 222K rwdata, 1212K rodata, 164K init, 174K bss, 8736K reserved, 8192K cma-reserved)
从内存最顶端往下预留,最开始为3MB的MMAP,然后是16M的MMA,最后是8M的CMA区域,留给kernel的可用区域在0x26400000以下
接着看一下SSD202 kernel的虚拟映射表:
[ 0.000000] Virtual kernel memory layout:
[ 0.000000] vector : 0xffff0000 - 0xffff1000 ( 4 kB)
[ 0.000000] fixmap : 0xffc00000 - 0xfff00000 (3072 kB)
[ 0.000000] vmalloc : 0xc8000000 - 0xff800000 ( 888 MB)
[ 0.000000] lowmem : 0xc0000000 - 0xc7f00000 ( 127 MB)
[ 0.000000] modules : 0xbf800000 - 0xc0000000 ( 8 MB)
[ 0.000000] .text : 0xc0008000 - 0xc02710a8 (2469 kB)
[ 0.000000] .init : 0xc03c3000 - 0xc03ec000 ( 164 kB)
[ 0.000000] .data : 0xc03ec000 - 0xc0423bd8 ( 223 kB)
[ 0.000000] .bss : 0xc0425000 - 0xc04509e0 ( 175 kB)
vector为中断向量映射区,位于内存最高端区域
fixmap为固定映射区,即虚拟地址固定,主要的kernel初始化阶段使用,比如console,dtb等以及热补丁应用
vmalloc,虚拟内存申请的地址范围,用于给vmalloc/ioremap动态分配内存
lowmem是线性映射区,1:1映射到物理地址
vmalloc区域和lowmem区域之间有一个1MB的hole,可以防止vmalloc越界
.text、.init、.data、.bss都属于lowmem区域,也即ZONE_NORMAL;
vector、fixmap、vmalloc属于ZONE_HIGHMEM区域。
modules属于用户空间
以上预留的MMA,MMAP,CMA等空间都在lowmem区
关于CMA
Contiguous Memory Allocator, CMA
,连续内存分配器,用于分配连续的大块内存
CMA分配器
,会Reserve一片物理内存区域:
- 设备驱动不用时,内存管理系统将该区域用于分配和管理可移动类型页面;
- 设备驱动使用时,用于连续内存分配,此时已经分配的页面需要进行迁移;
- CMA并不进行内存管理,CMA area的内存最终还是要并入伙伴系统进行管理
- cma_alloc用来从指定的CMA area上分配count个连续的page frame,按照align对齐
此外,CMA分配器
还可以与DMA子系统
集成在一起,使用DMA的设备驱动程序无需使用单独的CMA API
在SSD202中,cma相关的日志如下:
------USB HOST Controller 使用
[ 0.000000] cma: Reserved 8 MiB at 0x26400000 = 0x26c00000 - 8M
[ 1.371962] Sstar-ehci-2 soc:Sstar-ehci-2: EHCI Host Controller
[ 1.377889] Sstar-ehci-2 soc:Sstar-ehci-2: new USB bus registered, assigned bus number 1
[ 1.385990] cma: cma_alloc(cma c0435ef0, count 1, align 0)
[ 1.386037] cma: cma_alloc(): returned c63bd840
[ 1.386052] cma: cma_alloc(cma c0435ef0, count 1, align 0)
[ 1.386068] cma: cma_alloc(): returned c63bd860
[ 1.386083] cma: cma_alloc(cma c0435ef0, count 1, align 0)
[ 1.386099] cma: cma_alloc(): returned c63bd880
------DMA 使用
[ 1.748775] MSYS: DMEM request: [BDMA]:0x00000840
[ 1.753324] cma: cma_alloc(cma c0435ef0, count 1, align 0)
[ 1.753367] cma: cma_alloc(): returned c63bd900
[ 1.753382] MSYS: DMEM request: [BDMA]:0x00000840 success, CPU phy:@0x26448000, virt:@0xC6448000
[ 6.942669] MSYS: DMEM request: [emac0_buff]:0x00000812
[ 6.947755] cma: cma_alloc(cma c0435ef0, count 1, align 0)
[ 6.947946] cma: cma_alloc(): returned c63bd920
------ETH PHY 使用
[ 6.947962] MSYS: DMEM request: [emac0_buff]:0x00000812 success, CPU phy:@0x26449000, virt:@0xC6449000
[ 7.902325] >> [sdmmc] ms_sdmmc_probe
[ 7.906510] cma: cma_alloc(cma c0435ef0, count 1, align 0)
[ 7.906630] cma: cma_alloc(): returned c63bd940
关于测试cma代码,借用宋老师的测试用例
/*
* kernel module helper for testing CMA
*
* Licensed under GPLv2 or later.
*/
#define DEBUG
#include <linux/module.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/miscdevice.h>
#include <linux/dma-mapping.h>
#define CMA_NUM 10
static struct device *cma_dev;
static dma_addr_t dma_phys[CMA_NUM];
static void *dma_virt[CMA_NUM];
/* any read request will free coherent memory, eg.
* cat /dev/cma_test
*/
static ssize_t
cma_test_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
int i;
for (i = 0; i < CMA_NUM; i++)
{
if (dma_virt[i])
{
dma_free_coherent(cma_dev, (i + 1) * SZ_1M, dma_virt[i], dma_phys[i]);
_dev_info(cma_dev, "free virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]);
dma_virt[i] = NULL;
break;
}
}
return 0;
}
/*
* any write request will alloc coherent memory, eg.
* echo 0 > /dev/cma_test
*/
static ssize_t
cma_test_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
int i;
int ret;
unsigned int ctc = 0;
for (i = 0; i < CMA_NUM; i++)
{
if (!dma_virt[i])
{
dma_virt[i] = dma_alloc_coherent(cma_dev, (i + 1) * SZ_1M, &dma_phys[i], GFP_KERNEL);
if (dma_virt[i])
{
void *p;
/* touch every page in the allocated memory */
for (p = dma_virt[i]; p < dma_virt[i] + (i + 1) * SZ_1M; p += PAGE_SIZE)
*(u32 *)p = ctc++;
_dev_info(cma_dev, "[%d] alloc virt: %p phys: %p\n", i, dma_virt[i], (void *)dma_phys[i]);
}
else
{
dev_err(cma_dev, "[%d] no mem in CMA area\n", i);
ret = -ENOMEM;
}
break;
}
}
return count;
}
static const struct file_operations cma_test_fops = {
.owner = THIS_MODULE,
.read = cma_test_read,
.write = cma_test_write,
};
static struct miscdevice cma_test_misc = {
.name = "cma_test",
.fops = &cma_test_fops,
};
static int __init cma_test_init(void)
{
int i = 0;
int ret = 0;
ret = misc_register(&cma_test_misc);
if (unlikely(ret))
{
pr_err("failed to register cma test misc device!\n");
return ret;
}
cma_dev = cma_test_misc.this_device;
cma_dev->coherent_dma_mask = ~0;
for (i = 0; i < CMA_NUM; i++)
dma_virt[i] = 0;
_dev_info(cma_dev, "registered.\n");
return ret;
}
module_init(cma_test_init);
static void __exit cma_test_exit(void)
{
misc_deregister(&cma_test_misc);
}
module_exit(cma_test_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Barry Song <[email protected]>");
MODULE_DESCRIPTION("kernel module to help the test of CMA");
MODULE_ALIAS("CMA test");
insmod cma-test.ko 加载模块
root@wireless-tag:/# insmod cma-test
[ 3971.556944] devtmpfs: create node [cma_test] dev-name [(null)]
[ 3971.562783] misc cma_test: registered.
echo 1 > /dev/cma-test 开始分配空间,每运行一次分配N(MB)
[ 3992.269833] cma: cma_alloc(cma c0435ef0, count 256, align 4)
[ 3992.269986] cma: cma_alloc(): returned c63be800
[ 3992.270497] misc cma_test: [0] alloc virt: c64c0000 phys: 264c0000
[ 3993.008489] random: fast init done
cat /dev/cma-test 释放空间,每运行一次释放前一次分配的空间
[ 4022.896707] cma: cma_release(page c63be800)
[ 4022.896887] misc cma_test: free virt: c64c0000 phys: 264c0000
根据内存分配关系,cma物理区域为0x26400000到26c00000,日志显示物理地址从264c0000开始增长,刚好在cma区
因为一共8M空间,超过3次后,空间将不够
root@wireless-tag:/# echo 1 > /dev/cma_test
[ 4187.761083] misc cma_test: [0] alloc virt: c64c0000 phys: 264c0000 --1M
root@wireless-tag:/#
root@wireless-tag:/# echo 1 > /dev/cma_test
[ 4188.574042] misc cma_test: [1] alloc virt: c65c0000 phys: 265c0000 --2M
root@wireless-tag:/# echo 1 > /dev/cma_test
[ 4189.444204] misc cma_test: [2] alloc virt: c67c0000 phys: 267c0000 --3M
root@wireless-tag:/# echo 1 > /dev/cma_test
[ 4190.766380] misc cma_test: [3] no mem in CMA area
最后介绍一下fixmap映射,关于详细fixmap可以参考 Fix-Mapped Addresses
在此以dtb加载为例进行介绍
由于使用openwrt系统,有以下几个点比较特别:
1. kernel,dtb,rootfs是打包在一起的,形成一个固件
2. dtb在打包时带有特殊标记,已便于在启动过程中自动查找dtb在固件(内存)中的位置
最后在dtb加载过程在arch/arm/kernel/devtree.c setup_machine_fdt
/**
* setup_machine_fdt - Machine setup when an dtb was passed to the kernel
* @dt_phys: physical address of dt blob
*
* If a dtb was passed to the kernel in r2, then use it to choose the
* correct machine_desc and to setup the system.
*/
const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys)
{
const struct machine_desc *mdesc, *mdesc_best = NULL;
void *virt_p = NULL;
#if defined(CONFIG_ARCH_MULTIPLATFORM) || defined(CONFIG_ARM_SINGLE_ARMV7M)
DT_MACHINE_START(GENERIC_DT, "Generic DT based system")
.l2c_aux_val = 0x0,
.l2c_aux_mask = ~0x0,
MACHINE_END
mdesc_best = &__mach_desc_GENERIC_DT;
#endif
virt_p = phys_to_virt(dt_phys);
early_print("to check atags dtb phys %p, virt %p\n", (void*)dt_phys, virt_p);
if (!dt_phys || !early_init_dt_verify(virt_p))
{
#ifdef CONFIG_SS_BUILTIN_DTB
if(early_init_dt_verify(builtin_dtb_start))
{
extern int early_atags_to_fdt(void *atag_list, void *fdt, int total_space);
extern u32 builtin_dtb_size;
//early_print("early_init_dt_verify() pass...\n");
if((!dt_phys ) || (!early_atags_to_fdt(virt_p, builtin_dtb_start, builtin_dtb_size)))
{
early_print("early_atags_to_fdt() success\n");
}
}
else
#endif
{
return NULL;
}
}
mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach);
if (!mdesc) {
const char *prop;
int size;
unsigned long dt_root;
early_print("\nError: unrecognized/unsupported "
"device tree compatible list:\n[ ");
dt_root = of_get_flat_dt_root();
prop = of_get_flat_dt_prop(dt_root, "compatible", &size);
while (size > 0) {
early_print("'%s' ", prop);
size -= strlen(prop) + 1;
prop += strlen(prop) + 1;
}
early_print("]\n\n");
dump_machine_table(); /* does not return */
}
/* We really don't want to do this, but sometimes firmware provides buggy data */
if (mdesc->dt_fixup)
mdesc->dt_fixup();
early_init_dt_scan_nodes();
/* Change machine number to match the mdesc we're using */
__machine_arch_type = mdesc->nr;
return mdesc;
}
fixmap初始化在arch/arm/mm/mmu.c中,执行过程为 setup_arch --> early_fixmap_init
void __init early_fixmap_init(void)
{
pmd_t *pmd;
/*
* The early fixmap range spans multiple pmds, for which
* we are not prepared:
*/
BUILD_BUG_ON((__fix_to_virt(__end_of_early_ioremap_region) >> PMD_SHIFT)
!= FIXADDR_TOP >> PMD_SHIFT);
pmd = fixmap_pmd(FIXADDR_TOP);
pmd_populate_kernel(&init_mm, pmd, bm_pte);
pte_offset_fixmap = pte_offset_early_fixmap;
}
kernel加载初始化阶段的页面建立的linux/arch/arm/kernel/head.S中,有3级页表,PGD–>PMD–>PTE
/*
* Setup the initial page tables. We only setup the barest
* amount which are required to get the kernel running, which
* generally means mapping in the kernel code.
*
* r8 = phys_offset, r9 = cpuid, r10 = procinfo
*
* Returns:
* r0, r3, r5-r7 corrupted
* r4 = physical page table address
*/
__create_page_tables:
pgtbl r4, r8 @ page table address
/*
* Clear the swapper page table
*/
mov r0, r4
mov r3, #0
add r6, r0, #PG_DIR_SIZE
1: str r3, [r0], #4
str r3, [r0], #4
str r3, [r0], #4
str r3, [r0], #4
teq r0, r6
bne 1b
#ifdef CONFIG_ARM_LPAE
/*
* Build the PGD table (first level) to point to the PMD table. A PGD
* entry is 64-bit wide.
*/
mov r0, r4
add r3, r4, #0x1000 @ first PMD table address
orr r3, r3, #3 @ PGD block type
mov r6, #4 @ PTRS_PER_PGD
mov r7, #1 << (55 - 32) @ L_PGD_SWAPPER
1:
#ifdef CONFIG_CPU_ENDIAN_BE8
str r7, [r0], #4 @ set top PGD entry bits
str r3, [r0], #4 @ set bottom PGD entry bits
#else
str r3, [r0], #4 @ set bottom PGD entry bits
str r7, [r0], #4 @ set top PGD entry bits
#endif
add r3, r3, #0x1000 @ next PMD table
subs r6, r6, #1
bne 1b
add r4, r4, #0x1000 @ point to the PMD tables
#ifdef CONFIG_CPU_ENDIAN_BE8
add r4, r4, #4 @ we only write the bottom word
#endif
#endif
ldr r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags
/*
* Create identity mapping to cater for __enable_mmu.
* This identity mapping will be removed by paging_init().
*/
adr r0, __turn_mmu_on_loc
ldmia r0, {r3, r5, r6}
sub r0, r0, r3 @ virt->phys offset
add r5, r5, r0 @ phys __turn_mmu_on
add r6, r6, r0 @ phys __turn_mmu_on_end
mov r5, r5, lsr #SECTION_SHIFT
mov r6, r6, lsr #SECTION_SHIFT
1: orr r3, r7, r5, lsl #SECTION_SHIFT @ flags + kernel base
str r3, [r4, r5, lsl #PMD_ORDER] @ identity mapping
cmp r5, r6
addlo r5, r5, #1 @ next section
blo 1b
/*
* Map our RAM from the start to the end of the kernel .bss section.
*/
add r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
ldr r6, =(_end - 1)
orr r3, r8, r7
add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
1: str r3, [r0], #1 << PMD_ORDER
add r3, r3, #1 << SECTION_SHIFT
cmp r0, r6
bls 1b
#ifdef CONFIG_XIP_KERNEL
/*
* Map the kernel image separately as it is not located in RAM.
*/
#define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
mov r3, pc
mov r3, r3, lsr #SECTION_SHIFT
orr r3, r7, r3, lsl #SECTION_SHIFT
add r0, r4, #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
str r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
ldr r6, =(_edata_loc - 1)
add r0, r0, #1 << PMD_ORDER
add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
1: cmp r0, r6
add r3, r3, #1 << SECTION_SHIFT
strls r3, [r0], #1 << PMD_ORDER
bls 1b
#endif
/*
* Then map boot params address in r2 if specified.
* We map 2 sections in case the ATAGs/DTB crosses a section boundary.
*/
mov r0, r2, lsr #SECTION_SHIFT
movs r0, r0, lsl #SECTION_SHIFT
subne r3, r0, r8
addne r3, r3, #PAGE_OFFSET
addne r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
orrne r6, r7, r0
strne r6, [r3], #1 << PMD_ORDER
addne r6, r6, #1 << SECTION_SHIFT
strne r6, [r3]
#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8)
sub r4, r4, #4 @ Fixup page table pointer
@ for 64-bit descriptors
#endif
#ifdef CONFIG_DEBUG_LL
#if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING)
/*
* Map in IO space for serial debugging.
* This allows debug messages to be output
* via a serial console before paging_init.
*/
addruart r7, r3, r0
mov r3, r3, lsr #SECTION_SHIFT
mov r3, r3, lsl #PMD_ORDER
add r0, r4, r3
mov r3, r7, lsr #SECTION_SHIFT
ldr r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
orr r3, r7, r3, lsl #SECTION_SHIFT
#ifdef CONFIG_ARM_LPAE
mov r7, #1 << (54 - 32) @ XN
#ifdef CONFIG_CPU_ENDIAN_BE8
str r7, [r0], #4
str r3, [r0], #4
#else
str r3, [r0], #4
str r7, [r0], #4
#endif
#else
orr r3, r3, #PMD_SECT_XN
str r3, [r0], #4
#endif
#else /* CONFIG_DEBUG_ICEDCC || CONFIG_DEBUG_SEMIHOSTING */
/* we don't need any serial debugging mappings */
ldr r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
#endif
#if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)
/*
* If we're using the NetWinder or CATS, we also need to map
* in the 16550-type serial port for the debug messages
*/
add r0, r4, #0xff000000 >> (SECTION_SHIFT - PMD_ORDER)
orr r3, r7, #0x7c000000
str r3, [r0]
#endif
#ifdef CONFIG_ARCH_RPC
/*
* Map in screen at 0x02000000 & SCREEN2_BASE
* Similar reasons here - for debug. This is
* only for Acorn RiscPC architectures.
*/
add r0, r4, #0x02000000 >> (SECTION_SHIFT - PMD_ORDER)
orr r3, r7, #0x02000000
str r3, [r0]
add r0, r4, #0xd8000000 >> (SECTION_SHIFT - PMD_ORDER)
str r3, [r0]
#endif
#endif
#ifdef CONFIG_ARM_LPAE
sub r4, r4, #0x1000 @ point to the PGD table
#endif
ret lr
ENDPROC(__create_page_tables)
关于swapper_pg_dir
/*
* swapper_pg_dir is the virtual address of the initial page table.
* We place the page tables 16K below KERNEL_RAM_VADDR. Therefore, we must
* make sure that KERNEL_RAM_VADDR is correctly set. Currently, we expect
* the least significant 16 bits to be 0x8000, but we could probably
* relax this restriction to KERNEL_RAM_VADDR >= PAGE_OFFSET + 0x4000.
*/
#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET)
#if (KERNEL_RAM_VADDR & 0xffff) != 0x8000
#error KERNEL_RAM_VADDR must start at 0xXXXX8000
更多推荐
Linux虚拟内存映射分析以及CMA测试 - 以SSD202为例
发布评论