主要表达的是所有类型、数据都是字节构成
int[]转换char*逐字节操作
int arr[5];
memset(arr, 0, sizeof(arr)); //sizeof(arr) = sizeof(int)*5 = 20
unsigned char* chPtr = (unsigned char*)arr; //char是单字节类型,用chPtr存放arr的首地址。char的范围是-128~127,所以用unsigned char
chPtr[0] = 0b11111111; //0xff
chPtr[1] = 0b11111111; //0xff
chPtr[2] = 0b11111111; //0xff
chPtr[3] = 0b01111111; //0x7f,高位字节在高地址,这是小端序(little endian)
cout << arr[0] << endl; //2147483647,有符号int的最大值,十六进制:0x7fffffff
cout << int(chPtr[0]) << "," << int(chPtr[1]) << "," << int(chPtr[2]) << "," << int(chPtr[3]) << endl; // 255, 255, 255, 127
//修改arr[1]的值
chPtr[4] = 170;
chPtr[5] = 185;
chPtr[6] = 101;
chPtr[7] = 0;
cout << arr[1] << endl; //6666666
cout << int(chPtr[4]) << "," << int(chPtr[5]) << "," << int(chPtr[6]) << "," << int(chPtr[7]) << endl; // 170, 185, 101, 0(MacOS)long转换成int*和char*
macos中long占8个字节
int main() {
long l = LONG_MAX;
unsigned int* p = (unsigned int*)&l; //把long拆解成2个4字节的int
unsigned char* p2 = (unsigned char*)&l; //拆解成8个单字节的char
cout << *p << "," << *(p+1) << endl; //4294967295,2147483647
//低地址的4个字节是4294967295,高地址的4个字节是2147483647,l的值就是2147483647*4294967295
for(int i = 0; i<8; i++) {
cout << int(p2[i]) << ","; // 255,255,255,255,255,255,255,127
}
cout << endl; //256*256*256*256*256*256*256*128 = 2147483647*4294967295 = 9223372030412325000
return 0;
}字符串
GBK
string s1 = "你好,世界。";
unsigned char* p1 = (unsigned char*)s1.data();
size_t len = s1.length();
for (int i = 0; i < len; i++) {
cout << int(p1[i]) << ","; // 196,227,186,195,163,172,202,192,189,231,161,163,
}
cout << endl;
char c1[3] = {196, 227, '\0'}; //C4 E3, \xC4\xE3
cout << c1 << endl; // 你UTF-8
UTF-8是可变长度的字符编码
SetConsoleOutputCP(CP_UTF8); //终端字符集改为UTF-8
string s1 = u8"你好,世界。"; //windows中从默认GBK改为用UTF-8
unsigned char* p1 = (unsigned char*)s1.data();
size_t len = s1.length(); //18
for (int i = 0; i < len; i++) {
cout << int(p1[i]) << ","; // 228,189,160,229,165,189,239,188,140,228,184,150,231,149,140,227,128,130, 每三个字节是一个中文
}
cout << endl;
char c1[4] = {228, 189, 160, '\0'}; //E4 BD A0 - > %E4%BD%A0
cout << c1 << endl; // 你Unicode
在windows中wchar_t是UTF-16编码,占二个字节,所以可以直接用unsigned short无符号短整型取出每个字的码元
wstring s1 = L"你好1"; // %u4F60%u597D
int len = s1.size(); //3,单字节字符和宽字符都算一个
const unsigned char* c1 = (const unsigned char*)s1.data();
for (int i = 0; i < len*2; i++) {
cout << short(c1[i]) << ","; // 96,79,125,89,49,0 -> 60 4F 7D 59 ...,数字1也占了二个字节,高字节是0
}
cout << endl;
unsigned short* c2 = (unsigned short*)s1.data();
for (int i = 0; i < len; i++) {
cout << c2[i] << ","; // 20320,22909,49 对应4F60,597D。20320和22909就是"你好"对应的Unicode码元
}
cout << endl;