Linux内核物理页面page结构分析

Linux内核物理页面page结构分析

首页休闲益智pin the buddy更新时间:2024-05-11

1、思考问题?(答案:Linux操作系统原理)

当内存不足时,我们如何进行分配?当操作系统运行时候太长,产生很多很多内存碎片,此时我们应该怎么办?如果我们想要分配几十个字节的小块内存,应该使用什么样的方法来解决此问题?我们如何提高系统分配物理内存的效率?

一、物理页面page结构

32位的CPU寻址时按照数据位宽(字word),但是CPU在处理物理内存时即不是按照字进行来分配,因为现在的CPU都采用页分配机制直接来管理内存。所以在CPU里面有一个叫MMU的硬件单元。它会处理虚拟内存到物理内存的映射关系,就是页表的翻译工作。我们站在CPU的角度来分析,管理物理内存的最小单位为页,Linux内核使用一个struct page数据结构描述一个物理页面。struct page数据结构在内核源码分析当中我们可以得到答案。

掌握Linux内核源码分析技术(优势):Linux内核开发工程师。page数据结构对应Linux内核源码如下:

struct page { // 专门用来描述一个物理页面

/* First double word block */

unsigned long flags; // flags此成员是页面的标志位集合,标志位pageflags结构体类型

union {

/*

mapping此成员,当这个页被用于文件缓存时,mapping指向和这个文件缓存相关联的address_space对象,

这个address_space对象是属于内存对象(比如索引节点)的页面集合。当这个页面用于匿名页面时,mapping指向一个

anon_vma数据结构,主要用于反向映射。

*/

struct address_space *mapping;

void *s_mem; /* slab first object */

};

/* Second double word */

struct {

union {

pgoff_t index; /* Our offset within mapping. */

void *freelist; /* sl[aou]b first free object */

bool pfmemalloc; /* If set by the page allocator,

* ALLOC_NO_WATERMARKS was set

* and the low watermark was not

* met implying that the system

* is under some pressure. The

* caller should try ensure

* this page is only used to

* free other pages.

*/

};

union {

#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \

defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)

/* Used for cmpxchg_double in slub */

unsigned long counters;

#else

/*

* Keep _count separate from slub cmpxchg_double data.

* As the rest of the double word is protected by

* slab_lock but _count is not.

*/

unsigned counters;

#endif

struct {

union {

/*

* Count of ptes mapped in

* mms, to show when page is

* mapped & limit reverse map

* searches.

*

* Used also for tail pages

* refcounting instead of

* _count. Tail pages cannot

* be mapped and keeping the

* tail page _count zero at

* all times guarantees

* get_page_unless_zero() will

* never succeed on tail

* pages.

*/

atomic_t _mapcount;

struct { /* SLUB */

unsigned inuse:16;

unsigned objects:15;

unsigned frozen:1;

};

int units; /* SLOB */

};

atomic_t _count; /* Usage count, see below. */

};

/*【_count和_mapcount是struct page数据结构中最重要的两个引用计数】

1、_count表示内核中引用该页面的次数,当_count的值为0时,表示page页面为空闲或即将要被释放的页面。当_count的值大于0

时,表示此page页面已被分配且内核正在使用,暂时不会被释放。内核中常用的加减_count引用计数的API:get_page()

put_page page_cache_get()等

2、_mapcount引用计数表示这个页面被进程映射的个数,即已经映射多少个用户pte页表。在32位Linux内核中,每个用户进程都

拥有3GB的虚拟空间和一份独立的页表。_mapcount引用计数主要用于RMAP反向映射机制中。_mapcount等于-1,表示没有pte映射到

页面当中,_mapcount等于0,表示只有父进程映射到页面。匿名页面刚分配时,_mapcount引用计数初始化为0.

*/

unsigned int active; /* SLAB */

};

};

/* Third double word block */

union {

/*

lru此成员主要用于在页面回收的LRU链表算法。

*/

struct list_head lru;

struct { /* slub per cpu partial pages */

struct page *next; /* Next partial slab */

#ifdef CONFIG_64BIT

int pages; /* Nr of partial slabs left */

int pobjects; /* Approximate # of objects */

#else

short int pages;

short int pobjects;

#endif

};

struct slab *slab_page; /* slab fields */

struct rcu_head rcu_head; /* Used by SLAB

* when destroying via RCU

*/

/* First tail page of compound page */

struct {

compound_page_dtor *compound_dtor;

unsigned long compound_order;

};

#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS

pgtable_t pmd_huge_pte; /* protected by page->ptl */

#endif

};

/* Remainder is not double word aligned */

union {

unsigned long private; /* Mapping-private opaque data:

* usually used for buffer_heads

* if PagePrivate set; used for

* swp_entry_t if PageSwapCache;

* indicates order in the buddy

* system if PG_buddy is set.

*/

#if USE_SPLIT_PTE_PTLOCKS

#if ALLOC_SPLIT_PTLOCKS

spinlock_t *ptl;

#else

spinlock_t ptl;

#endif

#endif

struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */

struct page *first_page; /* Compound tail pages */

};

#ifdef CONFIG_MEMCG

struct mem_cgroup *mem_cgroup;

#endif

/*

* On machines where all RAM is mapped into kernel address space,

* we can simply calculate the virtual address. On machines with

* highmem some memory is mapped into kernel virtual memory

* dynamically, so we need a place to store that address.

* Note that this field could be 16 bits on x86 ... ;)

*

* Architectures with slow multiplication can define

* WANT_PAGE_VIRTUAL in asm/page.h

*/

#if defined(WANT_PAGE_VIRTUAL)

/*virtual此成员是一个指向页所对应的虚拟地址的指针。 */

void *virtual; // 只有需要时才使用,动态映射高端内存页面

#endif /* WANT_PAGE_VIRTUAL */

#ifdef CONFIG_KMEMCHECK

/*

* kmemcheck wants to track the status of each byte in a page; this

* is a pointer to such a status block. NULL if not tracked.

*/

void *shadow;

#endif

#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS

int _last_cpupid;

#endif

}

enum pageflags {

PG_locked, /* 页面已经上锁,不要访问 */

PG_error, // 表示页面发生了I/O错误

PG_referenced, // 此标志位用来实现LRU算法中第二次机会法

PG_uptodate, // 标示页面内容是有效的,当该页面上读操作完成之后,设置该标志位

PG_dirty, // 表示页面内容被修改过,为脏页

PG_lru, // 表示该页在LRU链表中

PG_active, // 表示该页在活跃LRU链表中

PG_slab, // 表示页属于由slab分配器创建的slab

PG_owner_priv_1, /* 页面的所有者使用,如果是pagecache页面,文件系统可能使用*/

PG_arch_1, // 与体系结构相关的页面状态位

PG_reserved, // 表示页不可被换出

PG_private, /* 表示该页是有效的,当page->private包含有效值时会设置此标志位,如果是pagecache,那么包含一个文件系统相关的数据信息 */

PG_private_2, /* 如果是pagecache,可能包含 FS aux data */

PG_writeback, /* 页面正在回写 */

#ifdef CONFIG_PAGEFLAGS_EXTENDED

PG_head, /* A head page */

PG_tail, /* A tail page */

#else

PG_compound, /* 一个混合页面 */

#endif

PG_swapcache, /* 交换页面*/

PG_mappedtodisk, /* 在磁盘中分配blocks */

PG_reclaim, /* 立刻要被回收 */

PG_swapbacked, /* 页面是不可回收的 */

PG_unevictable, /* Page is "unevictable" */

#ifdef CONFIG_MMU

PG_mlocked, // VMA处于mlocked状态

#endif

#ifdef CONFIG_ARCH_USES_PG_UNCACHED

PG_uncached, /* Page has been mapped as uncached */

#endif

#ifdef CONFIG_MEMORY_FAILURE

PG_hwpoison, /* hardware poisoned page. Don't touch */

#endif

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

PG_compound_lock,

#endif

__NR_PAGEFLAGS,

/* Filesystems */

PG_checked = PG_owner_priv_1,

/* Two page bits are conscripted by FS-Cache to maintain local caching

* state. These bits are set on pages belonging to the netfs's inodes

* when those inodes are being locally cached.

*/

PG_fscache = PG_private_2, /* page backed by cache */

/* XEN */

/* Pinned in Xen as a read-only pagetable page. */

PG_pinned = PG_owner_priv_1,

/* Pinned as part of domain save (see xen_mm_pin_all()). */

PG_savepinned = PG_dirty,

/* Has a grant mapping of another (foreign) domain's page. */

PG_foreign = PG_owner_priv_1,

/* SLOB */

PG_slob_free = PG_private,

};

Linux内核为每个物理页面分配一个page数据结构,采用mem_map[]数组形式来存储这些page数据结构,并且它们和物理页面是一对一映射关系。struct page数据结构和物理页面对应关系视图如下:

page数据结构大小通常几十个字节,而且一个物理页面是4096字节,假设page数据占用40字节?

查看全文
大家还看了
也许喜欢
更多游戏

Copyright © 2024 妖气游戏网 www.17u1u.com All Rights Reserved