structtask_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* * For reasons of header soup (see current_thread_info()), this * must be the first element of task_struct. */ structthread_infothread_info;//线程描述符 #endif /* -1 unrunnable, 0 runnable, >0 stopped: */ volatilelong state; //进程的状态
/* * This begins the randomizable portion of task_struct. Only * scheduling-critical items should be added above here. */ randomized_struct_fields_start
void *stack; atomic_t usage; /* Per task flags (PF_*), defined further below: */ unsignedint flags; unsignedint ptrace;
#ifdef CONFIG_SMP structllist_nodewake_entry; int on_cpu; #ifdef CONFIG_THREAD_INFO_IN_TASK /* Current CPU: */ unsignedint cpu; #endif unsignedint wakee_flips; unsignedlong wakee_flip_decay_ts; structtask_struct *last_wakee;
/* * recent_used_cpu is initially set as the last CPU used by a task * that wakes affine another task. Waker/wakee relationships can * push tasks around a CPU where each wakeup moves to the next one. * Tracking a recently used CPU allows a quick search for a recently * used CPU that may be idle. */ int recent_used_cpu; int wake_cpu; #endif int on_rq;
int prio; int static_prio; int normal_prio; unsignedint rt_priority;
#ifdef SPLIT_RSS_COUNTING structtask_rss_statrss_stat; #endif int exit_state; int exit_code; int exit_signal; /* The signal sent when the parent dies: */ int pdeath_signal; /* JOBCTL_*, siglock protected: */ unsignedlong jobctl;
/* Used for emulating ABI behavior of previous Linux versions: */ unsignedint personality;
/* Scheduler bits, serialized by scheduler locks: */ unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; unsigned sched_remote_wakeup:1; /* Force alignment to the next boundary: */ unsigned :0;
/* Unserialized, strictly 'current' */
/* Bit to tell LSMs we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; #ifndef TIF_RESTORE_SIGMASK unsigned restore_sigmask:1; #endif #ifdef CONFIG_MEMCG unsigned in_user_fault:1; #ifdef CONFIG_MEMCG_KMEM unsigned memcg_kmem_skip_account:1; #endif #endif #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif #ifdef CONFIG_CGROUPS /* disallow userland-initiated cgroup migration */ unsigned no_cgroup_migration:1; #endif #ifdef CONFIG_BLK_CGROUP /* to be used once the psi infrastructure lands upstream. */ unsigned use_memdelay:1; #endif
#ifdef CONFIG_STACKPROTECTOR /* Canary value for the -fstack-protector GCC feature: */ unsignedlong stack_canary; #endif /* * Pointers to the (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */
/* Real parent process: */ structtask_struct __rcu *real_parent;
/* Recipient of SIGCHLD, wait4() reports: */ structtask_struct __rcu *parent;
/* * Children/sibling form the list of natural children: */ structlist_headchildren; structlist_headsibling; structtask_struct *group_leader;
/* * 'ptraced' is the list of tasks this task is using ptrace() on. * * This includes both natural children and PTRACE_ATTACH targets. * 'ptrace_entry' is this task's link on the p->parent->ptraced list. */ structlist_headptraced; structlist_headptrace_entry;
/* Thread group tracking: */ u32 parent_exec_id; u32 self_exec_id;
/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */ spinlock_t alloc_lock;
/* Protection of the PI data structures: */ raw_spinlock_t pi_lock;
structwake_q_nodewake_q;
#ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task: */ structrb_root_cachedpi_waiters; /* Updated under owner's pi_lock and rq lock */ structtask_struct *pi_top_task; /* Deadlock detection and priority inheritance handling: */ structrt_mutex_waiter *pi_blocked_on; #endif
structtask_io_accountingioac; #ifdef CONFIG_TASK_XACCT /* Accumulated RSS usage: */ u64 acct_rss_mem1; /* Accumulated virtual memory usage: */ u64 acct_vm_mem1; /* stime + utime since last update: */ u64 acct_timexpd; #endif #ifdef CONFIG_CPUSETS /* Protected by ->alloc_lock: */ nodemask_t mems_allowed; /* Seqence number to catch updates: */ seqcount_t mems_allowed_seq; int cpuset_mem_spread_rotor; int cpuset_slab_spread_rotor; #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock: */ structcss_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock: */ structlist_headcg_list; #endif #ifdef CONFIG_INTEL_RDT u32 closid; u32 rmid; #endif #ifdef CONFIG_FUTEX structrobust_list_head __user *robust_list; #ifdef CONFIG_COMPAT structcompat_robust_list_head __user *compat_robust_list; #endif structlist_headpi_state_list; structfutex_pi_state *pi_state_cache; #endif #ifdef CONFIG_PERF_EVENTS structperf_event_context *perf_event_ctxp[perf_nr_task_contexts]; structmutexperf_event_mutex; structlist_headperf_event_list; #endif #ifdef CONFIG_DEBUG_PREEMPT unsignedlong preempt_disable_ip; #endif #ifdef CONFIG_NUMA /* Protected by alloc_lock: */ structmempolicy *mempolicy; short il_prev; short pref_node_fork; #endif #ifdef CONFIG_NUMA_BALANCING int numa_scan_seq; unsignedint numa_scan_period; unsignedint numa_scan_period_max; int numa_preferred_nid; unsignedlong numa_migrate_retry; /* Migration stamp: */ u64 node_stamp; u64 last_task_numa_placement; u64 last_sum_exec_runtime; structcallback_headnuma_work;
/* * This pointer is only modified for current in syscall and * pagefault context (and for tasks being destroyed), so it can be read * from any of the following contexts: * - RCU read-side critical section * - current->numa_group from everywhere * - task's runqueue locked, task not running */ structnuma_group __rcu *numa_group;
/* * numa_faults is an array split into four regions: * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer * in this precise order. * * faults_memory: Exponential decaying average of faults on a per-node * basis. Scheduling placement decisions are made based on these * counts. The values remain static for the duration of a PTE scan. * faults_cpu: Track the nodes the process was running on when a NUMA * hinting fault was incurred. * faults_memory_buffer and faults_cpu_buffer: Record faults per node * during the current scan window. When the scan completes, the counts * in faults_memory and faults_cpu decay and these values are copied. */ unsignedlong *numa_faults; unsignedlong total_numa_faults;
/* * numa_faults_locality tracks if faults recorded during the last * scan window were remote/local or failed to migrate. The task scan * period is adapted based on the locality of the faults with different * weights depending on whether they were shared or private faults */ unsignedlong numa_faults_locality[3];
#ifdef CONFIG_RSEQ structrseq __user *rseq; u32 rseq_len; u32 rseq_sig; /* * RmW on rseq_event_mask must be performed atomically * with respect to preemption. */ unsignedlong rseq_event_mask; #endif
structtlbflush_unmap_batchtlb_ubc;
structrcu_headrcu;
/* Cache last used pipe for splice(): */ structpipe_inode_info *splice_pipe;
#ifdef CONFIG_FAULT_INJECTION int make_it_fail; unsignedint fail_nth; #endif /* * When (nr_dirtied >= nr_dirtied_pause), it's time to call * balance_dirty_pages() for a dirty throttling pause: */ int nr_dirtied; int nr_dirtied_pause; /* Start of a write-and-pause period: */ unsignedlong dirty_paused_when;
#ifdef CONFIG_LATENCYTOP int latency_record_count; structlatency_recordlatency_record[LT_SAVECOUNT]; #endif /* * Time slack values; these are used to round up poll() and * select() etc timeout values. These are in nanoseconds. */ u64 timer_slack_ns; u64 default_timer_slack_ns;
#ifdef CONFIG_UPROBES structuprobe_task *utask; #endif #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) unsignedint sequential_io; unsignedint sequential_io_avg; #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsignedlong task_state_change; #endif int pagefault_disabled; #ifdef CONFIG_MMU structtask_struct *oom_reaper_list; #endif #ifdef CONFIG_VMAP_STACK structvm_struct *stack_vm_area; #endif #ifdef CONFIG_THREAD_INFO_IN_TASK /* A live task holds one reference: */ atomic_t stack_refcount; #endif #ifdef CONFIG_LIVEPATCH int patch_state; #endif #ifdef CONFIG_SECURITY /* Used by LSM modules for access restriction: */ void *security; #endif
/* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. */ randomized_struct_fields_end
/* CPU-specific state of this task: */ structthread_structthread;
/* * WARNING: on x86, 'thread_struct' contains a variable-sized * structure. It *MUST* be at the end of 'task_struct'. * * Do not put anything below here! */ };
/* /include/linux/cred.h */ /* * The security context of a task * * The parts of the context break down into two categories: * * (1) The objective context of a task. These parts are used when some other * task is attempting to affect this one. * * (2) The subjective context. These details are used when the task is acting * upon another object, be that a file, a task, a key or whatever. * * Note that some members of this structure belong to both categories - the * LSM security pointer for instance. * * A task has two security pointers. task->real_cred points to the objective * context that defines that task's actual details. The objective part of this * context is used whenever that task is acted upon. * * task->cred points to the subjective context that defines the details of how * that task is going to act upon another object. This may be overridden * temporarily to point to another security context, but normally points to the * same context as task->real_cred. */ structcred { atomic_t usage; #ifdef CONFIG_DEBUG_CREDENTIALS atomic_t subscribers; /* number of processes subscribed */ void *put_addr; unsigned magic; #define CRED_MAGIC 0x43736564 #define CRED_MAGIC_DEAD 0x44656144 #endif kuid_t uid; /* real UID of the task */ kgid_t gid; /* real GID of the task */ kuid_t suid; /* saved UID of the task */ kgid_t sgid; /* saved GID of the task */ kuid_t euid; /* effective UID of the task */ kgid_t egid; /* effective GID of the task */ kuid_t fsuid; /* UID for VFS ops */ kgid_t fsgid; /* GID for VFS ops */ unsigned securebits; /* SUID-less security management */ kernel_cap_t cap_inheritable; /* caps our children can inherit */ kernel_cap_t cap_permitted; /* caps we're permitted */ kernel_cap_t cap_effective; /* caps we can actually use */ kernel_cap_t cap_bset; /* capability bounding set */ kernel_cap_t cap_ambient; /* Ambient capability set */ #ifdef CONFIG_KEYS unsignedchar jit_keyring; /* default keyring to attach requested * keys to */ structkey __rcu *session_keyring;/* keyring inherited over fork */ structkey *process_keyring;/* keyring private to this process */ structkey *thread_keyring;/* keyring private to this thread */ structkey *request_key_auth;/* assumed request_key authority */ #endif #ifdef CONFIG_SECURITY void *security; /* subjective LSM security */ #endif structuser_struct *user;/* real user ID subscription */ structuser_namespace *user_ns;/* user_ns the caps and keyrings are relative to. */ structgroup_info *group_info;/* supplementary groups for euid/fsgid */ /* RCU deletion */ union { int non_rcu; /* Can we skip RCU deletion? */ structrcu_headrcu;/* RCU deletion hook */ }; } __randomize_layout;
read_cred与cred
根据源码中的注释,我是这样理解这两个字段的:
task->real_cred points to the objective context that defines that task’s actual details. The objective part of this context is used whenever that task is acted upon.
task->cred points to the subjective context that defines the details of how that task is going to act upon another object. This may be overridden temporarily to point to another security context, but normally points to the same context as task->real_cred.
/* /kernel/cred.c */ /** * prepare_kernel_cred - Prepare a set of credentials for a kernel service * @daemon: A userspace daemon to be used as a reference * * Prepare a set of credentials for a kernel service. This can then be used to * override a task's own credentials so that work can be done on behalf of that * task that requires a different subjective context. * * @daemon is used to provide a base for the security record, but can be NULL. * If @daemon is supplied, then the security data will be derived from that; * otherwise they'll be set to 0 and no groups, full capabilities and no keys. * * The caller may change these controls afterwards if desired. * * Returns the new credentials or NULL if out of memory. * * Does not take, and does not return holding current->cred_replace_mutex. */ struct cred *prepare_kernel_cred(struct task_struct *daemon) { conststructcred *old; structcred *new;
new = kmem_cache_alloc(cred_jar, GFP_KERNEL); if (!new) returnNULL;
kdebug("prepare_kernel_cred() alloc %p", new);
if (daemon) old = get_task_cred(daemon); else old = get_cred(&init_cred);
/* /kernel/cred.c */ /** * commit_creds - Install new credentials upon the current task * @new: The credentials to be assigned * * Install a new set of credentials to the current task, using RCU to replace * the old set. Both the objective and the subjective credentials pointers are * updated. This function may not be called if the subjective credentials are * in an overridden state. * * This function eats the caller's reference to the new credentials. * * Always returns 0 thus allowing this function to be tail-called at the end * of, say, sys_setgid(). */ intcommit_creds(struct cred *new) { structtask_struct *task = current; conststructcred *old = task->real_cred;
get_cred(new); /* we will require a ref for the subj creds too */
/* dumpability changes */ if (!uid_eq(old->euid, new->euid) || !gid_eq(old->egid, new->egid) || !uid_eq(old->fsuid, new->fsuid) || !gid_eq(old->fsgid, new->fsgid) || !cred_cap_issubset(old, new)) { if (task->mm) set_dumpable(task->mm, suid_dumpable); task->pdeath_signal = 0; /* * If a task drops privileges and becomes nondumpable, * the dumpability change must become visible before * the credential change; otherwise, a __ptrace_may_access() * racing with this change may be able to attach to a task it * shouldn't be able to attach to (as if the task had dropped * privileges without becoming nondumpable). * Pairs with a read barrier in __ptrace_may_access(). */ smp_wmb(); }
/* alter the thread keyring */ if (!uid_eq(new->fsuid, old->fsuid)) key_fsuid_changed(task); if (!gid_eq(new->fsgid, old->fsgid)) key_fsgid_changed(task);
/* do it * RLIMIT_NPROC limits on user->processes have already been checked * in set_user(). */ alter_cred_subscribers(new, 2); if (new->user != old->user) atomic_inc(&new->user->processes); rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user) atomic_dec(&old->user->processes); alter_cred_subscribers(old, -2);