Administrator
Administrator
发布于 2025-09-14 / 11 阅读
0
0

拆解内存和数据类型的关系

主要表达的是所有类型、数据都是字节构成

int[]转换char*逐字节操作

	int arr[5];
	memset(arr, 0, sizeof(arr)); //sizeof(arr) = sizeof(int)*5 = 20 
	unsigned char* chPtr = (unsigned char*)arr; //char是单字节类型,用chPtr存放arr的首地址。char的范围是-128~127,所以用unsigned char
	chPtr[0] = 0b11111111; //0xff
	chPtr[1] = 0b11111111; //0xff
	chPtr[2] = 0b11111111; //0xff
	chPtr[3] = 0b01111111; //0x7f,高位字节在高地址,这是小端序(little endian)
	cout << arr[0] << endl; //2147483647,有符号int的最大值,十六进制:0x7fffffff
	cout << int(chPtr[0]) << "," << int(chPtr[1]) << "," << int(chPtr[2]) << "," << int(chPtr[3]) << endl; // 255, 255, 255, 127

	//修改arr[1]的值
	chPtr[4] = 170;
	chPtr[5] = 185;
	chPtr[6] = 101;
	chPtr[7] = 0;
	cout << arr[1] << endl; //6666666
	cout << int(chPtr[4]) << "," << int(chPtr[5]) << "," << int(chPtr[6]) << "," << int(chPtr[7]) << endl; // 170, 185, 101, 0

(MacOS)long转换成int*和char*

macos中long占8个字节

int main() {
    long l = LONG_MAX;
    unsigned int* p = (unsigned int*)&l; //把long拆解成2个4字节的int
    unsigned char* p2 = (unsigned char*)&l; //拆解成8个单字节的char
    cout << *p << "," << *(p+1) << endl; //4294967295,2147483647
    //低地址的4个字节是4294967295,高地址的4个字节是2147483647,l的值就是2147483647*4294967295
    for(int i = 0; i<8; i++) {
        cout << int(p2[i]) << ","; // 255,255,255,255,255,255,255,127
    }
    cout << endl; //256*256*256*256*256*256*256*128 = 2147483647*4294967295 = 9223372030412325000
    
    return 0;
}

字符串

GBK

	string s1 = "你好,世界。";
	unsigned char* p1 = (unsigned char*)s1.data();

	size_t len = s1.length();
	for (int i = 0; i < len; i++) {
		cout << int(p1[i]) << ","; // 196,227,186,195,163,172,202,192,189,231,161,163,
	}
	cout << endl;

	char c1[3] = {196, 227, '\0'}; //C4 E3, \xC4\xE3
	cout << c1 << endl; // 你

UTF-8

UTF-8是可变长度的字符编码

	SetConsoleOutputCP(CP_UTF8); //终端字符集改为UTF-8
	string s1 = u8"你好,世界。"; //windows中从默认GBK改为用UTF-8
	unsigned char* p1 = (unsigned char*)s1.data();

	size_t len = s1.length(); //18
	for (int i = 0; i < len; i++) {
		cout << int(p1[i]) << ","; // 228,189,160,229,165,189,239,188,140,228,184,150,231,149,140,227,128,130, 每三个字节是一个中文
	}
	cout << endl;

	char c1[4] = {228, 189, 160, '\0'}; //E4 BD A0 - > %E4%BD%A0
	cout << c1 << endl; // 你

Unicode

在windows中wchar_t是UTF-16编码,占二个字节,所以可以直接用unsigned short无符号短整型取出每个字的码元

	wstring s1 = L"你好1"; // %u4F60%u597D
	int len = s1.size(); //3,单字节字符和宽字符都算一个

	const unsigned char* c1 = (const unsigned char*)s1.data();
	for (int i = 0; i < len*2; i++) {
		cout << short(c1[i]) << ","; // 96,79,125,89,49,0 -> 60 4F 7D 59 ...,数字1也占了二个字节,高字节是0
	}
	cout << endl;


	unsigned short* c2 = (unsigned short*)s1.data();
	for (int i = 0; i < len; i++) {
		cout << c2[i] << ","; // 20320,22909,49 对应4F60,597D。20320和22909就是"你好"对应的Unicode码元
	}
	cout << endl;


评论