[TOC]

include/linux/ctype.h Linux 内核字符类型判断与转换 (ctype.h)

本代码片段是 Linux 内核自定义的 ctype.h 头文件，其核心功能是提供一套高效、独立且内核安全的字符类型判断（如 isalpha, isdigit）与大小写转换（如 tolower, toupper）的宏和内联函数。它通过一个预计算的查找表（Lookup Table）实现了极高的执行效率，并避免了对标准 C 库的依赖，这对于保持内核的自包含性和性能至关重要。

实现原理分析

此 ctype 实现的核心是一种经典的、以空间换时间的优化技巧：基于数组查找表的位掩码（bitmask）分类法。

查找表与位掩码:
- 代码预先定义了一个外部数组 _ctype[]。这个数组共有 256 个元素，精确对应所有可能的 8 位 ASCII 字符。
- 数组中的每个元素是一个 unsigned char，其本身是一个位掩码。掩码中的每一位（bit）代表一种特定的字符属性，例如 _U (0x01) 代表大写字母，_L (0x02) 代表小写字母，_D (0x04) 代表数字，等等。如果一个字符同时具备多种属性（如 ‘A’ 既是大写字母 _U 又是十六进制数 _X），其在 _ctype 表中对应的元素的各位就会被相应地设置（_ctype['A'] 的值会是 _U | _X）。
高效的类型判断:
- 所有的 is...() 判断宏，如 isalnum(c)，都被展开为 ((_ctype[(int)(unsigned char)(c)] & (_U|_L|_D)) != 0)。
- 这个表达式的执行过程是：
  a. 将输入字符 c 强制转换为 unsigned char，再转换为 int，以确保它能作为一个合法的、范围在 0 到 255 之间的数组索引。
  b. 通过 _ctype[...] 直接从查找表中取出该字符的属性位掩码。
  c. 将取出的掩码与该判断函数关心的属性组合（如 isalnum 关心 _U|_L|_D）进行按位与（&）运算。
  d. 如果运算结果不为零，说明该字符至少具备所关心属性中的一种，判断为真。
- 这种方法将传统 if-else 链或复杂的范围比较，转化为一次内存访问和一次位运算，其执行速度极快且是常量时间。
编译器内建函数优化: isdigit(c) 的实现展示了对性能的极致追求。它优先使用 __has_builtin(__builtin_isdigit) 来检查编译器是否提供了更高性能的内建实现。如果有，就直接使用；如果没有，才退化为 return '0' <= c && c <= '9'; 这种标准的范围比较。

代码分析

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_CTYPE_H
#define _LINUX_CTYPE_H

#include <linux/compiler.h>

/*
 * 注意！此 ctype 实现不像标准 C 库那样必须处理 EOF。
 */

#define _U	0x01	/**< 属性位：大写字母 */
#define _L	0x02	/**< 属性位：小写字母 */
#define _D	0x04	/**< 属性位：数字 */
#define _C	0x08	/**< 属性位：控制字符 */
#define _P	0x10	/**< 属性位：标点符号 */
#define _S	0x20	/**< 属性位：空白字符 (空格/换行/制表符) */
#define _X	0x40	/**< 属性位：十六进制数字 */
#define _SP	0x80	/**< 属性位：硬空格 (特指 ASCII 0x20) */

/** @brief 字符属性查找表，包含了256个ASCII字符的属性位掩码。 */
extern const unsigned char _ctype[];

/** @brief 核心宏，用于从查找表中获取指定字符的属性位掩码。 */
#define __ismask(x) (_ctype[(int)(unsigned char)(x)])

/** @brief 检查字符是否为字母或数字。 */
#define isalnum(c)	((__ismask(c)&(_U|_L|_D)) != 0)
/** @brief 检查字符是否为字母。 */
#define isalpha(c)	((__ismask(c)&(_U|_L)) != 0)
/** @brief 检查字符是否为控制字符。 */
#define iscntrl(c)	((__ismask(c)&(_C)) != 0)
/** @brief 检查字符是否为可打印字符（不包括空格）。 */
#define isgraph(c)	((__ismask(c)&(_P|_U|_L|_D)) != 0)
/** @brief 检查字符是否为小写字母。 */
#define islower(c)	((__ismask(c)&(_L)) != 0)
/** @brief 检查字符是否为可打印字符（包括空格）。 */
#define isprint(c)	((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0)
/** @brief 检查字符是否为标点符号。 */
#define ispunct(c)	((__ismask(c)&(_P)) != 0)
/* 注意：isspace() 对于字符串结束符 %NUL 必须返回 false */
/** @brief 检查字符是否为空白字符。 */
#define isspace(c)	((__ismask(c)&(_S)) != 0)
/** @brief 检查字符是否为大写字母。 */
#define isupper(c)	((__ismask(c)&(_U)) != 0)
/** @brief 检查字符是否为十六进制数字。 */
#define isxdigit(c)	((__ismask(c)&(_D|_X)) != 0)

/** @brief 检查字符是否为 ASCII 字符 (0-127)。 */
#define isascii(c) (((unsigned char)(c))<=0x7f)
/** @brief 将字符转换为 ASCII 字符 (清除最高位)。 */
#define toascii(c) (((unsigned char)(c))&0x7f)

#if __has_builtin(__builtin_isdigit)
/** @brief 检查字符是否为数字 (优先使用编译器内建函数以提升性能)。 */
#define  isdigit(c) __builtin_isdigit(c)
#else
/**
 * @brief 检查字符 'c' 是否为数字 ('0'-'9')。
 * @param c 要检查的字符。
 * @return 如果是数字则返回非零值，否则返回0。
 */
static inline int isdigit(int c)
{
	return '0' <= c && c <= '9';
}
#endif

/**
 * @brief 将字符 'c' 转换为小写。
 * @param c 要转换的字符。
 * @return 转换后的小写字符。
 */
static inline unsigned char __tolower(unsigned char c)
{
	if (isupper(c))
		c -= 'A'-'a';
	return c;
}

/**
 * @brief 将字符 'c' 转换为大写。
 * @param c 要转换的字符。
 * @return 转换后的大写字符。
 */
static inline unsigned char __toupper(unsigned char c)
{
	if (islower(c))
		c -= 'a'-'A';
	return c;
}

/** @brief 将字符转换为小写 (通用接口)。 */
#define tolower(c) __tolower(c)
/** @brief 将字符转换为大写 (通用接口)。 */
#define toupper(c) __toupper(c)

/*
 * 内部使用的 tolower() 快速实现。请勿在你的代码中使用。
 */
/**
 * @brief 将字符 'c' 快速转换为小写（不进行检查）。
 * @param c 必须是一个大写字母。
 * @return 对应的小写字母。
 * @note 此函数假定输入必为大写字母，通过位或操作0x20来翻转大小写差异位。
 */
static inline char _tolower(const char c)
{
	return c | 0x20;
}

/**
 * @brief 快速检查字符 'c' 是否为八进制数字 ('0'-'7')。
 * @param c 要检查的字符。
 * @return 如果是八进制数字则返回非零值，否则返回0。
 */
static inline int isodigit(const char c)
{
	return c >= '0' && c <= '7';
}

#endif

lib/ctype.c

// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/lib/ctype.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/ctype.h>
#include <linux/compiler.h>
#include <linux/export.h>

const unsigned char _ctype[] = {
_C,_C,_C,_C,_C,_C,_C,_C,				/* 0-7 */
_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,			/* 8-15 */
_C,_C,_C,_C,_C,_C,_C,_C,				/* 16-23 */
_C,_C,_C,_C,_C,_C,_C,_C,				/* 24-31 */
_S|_SP,_P,_P,_P,_P,_P,_P,_P,				/* 32-39 */
_P,_P,_P,_P,_P,_P,_P,_P,				/* 40-47 */
_D,_D,_D,_D,_D,_D,_D,_D,				/* 48-55 */
_D,_D,_P,_P,_P,_P,_P,_P,				/* 56-63 */
_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,		/* 64-71 */
_U,_U,_U,_U,_U,_U,_U,_U,				/* 72-79 */
_U,_U,_U,_U,_U,_U,_U,_U,				/* 80-87 */
_U,_U,_U,_P,_P,_P,_P,_P,				/* 88-95 */
_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,		/* 96-103 */
_L,_L,_L,_L,_L,_L,_L,_L,				/* 104-111 */
_L,_L,_L,_L,_L,_L,_L,_L,				/* 112-119 */
_L,_L,_L,_P,_P,_P,_P,_C,				/* 120-127 */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,			/* 128-143 */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,			/* 144-159 */
_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,	/* 160-175 */
_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,	/* 176-191 */
_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,	/* 192-207 */
_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,	/* 208-223 */
_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,	/* 224-239 */
_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};	/* 240-255 */

EXPORT_SYMBOL(_ctype);

include/linux/stringify.h 字符串化

__stringify 参数字符串化

__stringify 是一个宏，用于将参数转换为字符串。它通常用于将宏参数转换为字符串，以便在编译时生成字符串常量。这个宏在内核开发中非常有用，尤其是在需要生成调试信息或错误消息时。

1 2	#define __stringify_1(x...) #x #define __stringify(x...) __stringify_1(x)

FILE_LINE 文件名和行号

1	#define FILE_LINE __FILE__ ":" __stringify(__LINE__)

include/linux/compiler.h

unreachable

/*
 * 将代码中的位置标记为 unreachable。 这可用于在 ASM 阻止传输后抑制控制流警告控制其他位置。
 */
#define unreachable() do {		\
	barrier_before_unreachable();	\
	__builtin_unreachable();	\
} while (0)

__must_be_array 检查是否为数组

/* 	&a[0] 降级为指针：与数组不同的类型 
	如果 a 是指针类型，&(a)[0] 的类型仍然是指针，但与 a 的类型相同
*/
/* 如果 a 是数组类型，&(a)[0] 的类型是指向数组元素的指针 */
#define __is_array(a)		(!__same_type((a), &(a)[0]))
#define __must_be_array(a)	__BUILD_BUG_ON_ZERO_MSG(!__is_array(a), \
							"must be array")

include/linux/string.h

strscpy_pad() - 将 C 字符串复制到指定大小的缓冲区中

/**
 * strscpy_pad() - 将 C 字符串复制到指定大小的缓冲区中
 * @dst: 字符串复制的目标位置
 * @src: 字符串复制的来源位置
 * @...: 目标缓冲区的大小
 *
 * 将字符串复制到目标缓冲区中，或复制尽可能多的内容。若字符串缓冲区重叠，则行为未定义。
 * 除非目标缓冲区大小为零，否则目标缓冲区始终以 %NUL 终止。
 *
 * 如果源字符串比目标缓冲区短，则缓冲区中的剩余字节将填充为 %NUL 字节。
 *
 * 有关为什么可能需要考虑使用 'strscpy' 函数的完整说明，请参阅 strscpy() 的函数文档字符串。
 *
 * 返回值:
 * * 复制的字符数（不包括尾随的 %NUL 字节）
 * * 如果计数为 0 或 @src 被截断，则返回 -E2BIG。
 */
#define strscpy_pad(dst, src, ...)	\
	/* COUNT_ARGS将传入的可变参数转换为数量
		CONCATENATE将__strscpy_pad##num_args与传入的参数连接
		__strscpy_pad(num_args)是一个宏，用于根据传入的参数数量选择正确的函数实现
	 */
	CONCATENATE(__strscpy_pad, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__)

#define __strscpy_pad0(dst, src, ...)	\
	sized_strscpy_pad(dst, src, sizeof(dst) + __must_be_array(dst) +	\
				    __must_be_cstr(dst) + __must_be_cstr(src))
#define __strscpy_pad1(dst, src, size)	\
	sized_strscpy_pad(dst, src, size + __must_be_cstr(dst) + __must_be_cstr(src))

#define sized_strscpy_pad(dest, src, count)	({			\
	char *__dst = (dest);						\
	const char *__src = (src);					\
	const size_t __count = (count);					\
	ssize_t __wrote;						\
									\
	/* 	执行字符串复制。
		如果复制的字符数小于目标缓冲区大小，
		使用 memset 将剩余空间填充为零字节 */
	__wrote = sized_strscpy(__dst, __src, __count);			\
	if (__wrote >= 0 && __wrote < __count)				\
		memset(__dst + __wrote + 1, 0, __count - __wrote - 1);	\
	__wrote;							\
})

lib/string_helpers.c

skip_spaces 跳过空格

/**
 * skip_spaces - Removes leading whitespace from @str.
 * @str: The string to be stripped.
 *
 * Returns a pointer to the first non-whitespace character in @str.
 */
char *skip_spaces(const char *str)
{
	while (isspace(*str))
		++str;
	return (char *)str;
}
EXPORT_SYMBOL(skip_spaces);

lib/string.c

sized_strscpy - 将 C 字符串复制到指定大小的缓冲区中

ssize_t sized_strscpy(char *dest, const char *src, size_t count)
{
	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
	size_t max = count;
	long res = 0;

	if (count == 0 || WARN_ON_ONCE(count > INT_MAX))
		return -E2BIG;

#ifndef CONFIG_DCACHE_WORD_ACCESS
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
	/*
	 * If src is unaligned, don't cross a page boundary,
	 * since we don't know if the next page is mapped.
	 */
	if ((long)src & (sizeof(long) - 1)) {
		size_t limit = PAGE_SIZE - ((long)src & (PAGE_SIZE - 1));
		if (limit < max)
			max = limit;
	}
#else
	/* 如果源或目标未对齐，则不要一次处理一个字。 */
	if (((long) dest | (long) src) & (sizeof(long) - 1))
		max = 0;
#endif
#endif

	/*
	 * load_unaligned_zeropad() or read_word_at_a_time() below may read
	 * uninitialized bytes after the trailing zero and use them in
	 * comparisons. Disable this optimization under KMSAN to prevent
	 * false positive reports.
	 */
	if (IS_ENABLED(CONFIG_KMSAN))
		max = 0;

	while (max >= sizeof(unsigned long)) {
		unsigned long c, data;

#ifdef CONFIG_DCACHE_WORD_ACCESS
		c = load_unaligned_zeropad(src+res);
#else
		c = read_word_at_a_time(src+res);
#endif
        /* 检查数据块是否包含空字符 */
		if (has_zero(c, &data, &constants)) {
            /* 标记空字符的位置 */
			data = prep_zero_mask(c, data, &constants);
            /* 生成最终的字节掩码 */
			data = create_zero_mask(data);
            /* 提取数据块 c 中的有效字节（即非空字符）。
            将处理后的数据块写入目标缓冲区 dest 的偏移位置 res */
			*(unsigned long *)(dest+res) = c & zero_bytemask(data);
			return res + find_zero(data);
		}
		count -= sizeof(unsigned long);
        /* 如果 count 为零，表示目标缓冲区已满，无法继续复制 */
		if (unlikely(!count)) {
            /* ALLBUTLAST_BYTE_MASK 是一个掩码，用于清除数据块 c 中的最后一个字节 */
			c &= ALLBUTLAST_BYTE_MASK;
			*(unsigned long *)(dest+res) = c;
			return -E2BIG;
		}
		*(unsigned long *)(dest+res) = c;
		res += sizeof(unsigned long);
		max -= sizeof(unsigned long);
	}

    /* 逐字节地将源字符串 src 的内容复制到目标缓冲区 dest。
    * 它通过循环逐个字符地处理字符串，
    * 同时检查是否遇到字符串的终止符 %NUL（空字符），
    * 以确保目标缓冲区正确终止 */
	while (count > 1) {
		char c;

		c = src[res];
		dest[res] = c;
		if (!c)
			return res;
		res++;
		count--;
	}

	/* Force NUL-termination. */
	dest[res] = '\0';

	/* Return E2BIG if the source didn't stop */
	return src[res] ? -E2BIG : res;
}
EXPORT_SYMBOL(sized_strscpy);