CPython3.7.9源碼學習一：C語言基礎、整數對象

C 語言基礎

結構體

// 
struct(關鍵字) 名稱 {結構體成員};// 定義結構體
struct Student {  char name[50];  int age;  float score;  
};// 初始化 結構體變量
struct Student stu1;
strcpy(stu1.name, "張三");  
stu1.age = 20;  
stu1.score = 90.5;// 初始化 結構體數組
struct Student students[2];  strcpy(students[0].name, "張三");  
students[0].age = 20;  
students[0].score = 90.5;  strcpy(students[1].name, "李四");  
students[1].age = 21;  
students[1].score = 85.0;  // 結構體指針，訪問結構體成員
struct Student *ptr = &stu1;  
printf("學生名字: %s\n", ptr->name);  
printf("學生年齡: %d\n", ptr->age);  
printf("學生分數: %.1f\n", ptr->score);// cpython中的結構體，長整型的結構體
struct _longobject {PyObject_VAR_HEADdigit ob_digit[1];
};

typedef

// 用于給數據類型定義別名，常用于給結構體定義別名// 將int別名設置成integer
typedef int integer;  
// 將int的指針設置pinteger
typedef int* pinteger;  // 定義變量
integer a = 100;
pinteger pa = &a;struct Student {  char name[50];  int age;  float score;  
};// 為struct Student 設置一個別名為stu 
typedef struct Student stu; 
// 為struct Student* 設置一個別名為pstu 
typedef struct Student* pstu;  
stu stu1;
stu1.name = "張三";
stu1.age = 20;
stu1.score = 89.5;pstu stu2 = (pstu)malloc(sizeof(Student)); // 申請內存
stu2->name = "張三";
stu2->age = 30;
stu2->score = 95.6;if (stu2 != NULL)
{free(stu2); // 釋放內存
}// cpython中的使用
typedef struct _longobject PyLongObject;

宏定義

// 用于定義常量和表達式，預編譯后會將對應的字符串替換成定義的值// 定義常量PyLong_SHIFT為30
#define PyLong_SHIFT    30// 源碼
(digit)1 << PyLong_SHIFT// 預編譯后
(digit)1 << 30

預編譯指令

#if 		// 基礎判斷
#ifdef 		// 判斷釋放有宏定義
#ifndef		// 
#else
#elif
#endif
#define		// 宏定義
#undef 		// 取消之前定義的宏
#defined// cpython
#if PYLONG_BITS_IN_DIGIT == 30
typedef uint32_t digit;
#define PyLong_SHIFT    30
#elif PYLONG_BITS_IN_DIGIT == 15
typedef unsigned short digit; 
#define PyLong_SHIFT    15
#else
#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"
#endif

assert斷言

// 用于在調試期間捕獲程序錯誤的機制
#include <assert.h>  
int x = 5;  
assert(x != 0); // 斷言 x 不等于 0

goto 語法

// 用于無條件跳轉到程序中的指定標簽int i = 0;  
for (i = 0; i < 10; i++) {  if (i == 5) {  goto end_loop; // 當 i 等于 5 時，跳轉到 end_loop 標簽處  }  printf("%d\n", i);  
}  
end_loop: // 這是 end_loop 標簽  
printf("Loop ended early.\n");

一切皆對象

整數對象

有無符號

整數分為無符號整數和有符號整數，無符號整數只表示正數和零，而有符號整數則通過特定的編碼方式（如補碼）來表示正數、負數和零，在補碼表示法中，最高位（符號位）為0表示正數，為1表示負數。其余位則用于表示數值的大小。

整數和操作系統

位數：

在32位操作系統中，整數通常使用32位來表示，即4個字節（32個比特）。
在64位操作系統中，整數通常使用64位來表示，即8個字節（64個比特）。

范圍：

在32位操作系統中，有符號整數的范圍通常是從 -2^31 到 2^31-1，即從 -2147483648 到 2147483647；無符號整數的范圍通常是從 0 到 2^32-1，即從 0 到 4294967295。
在64位操作系統中，有符號整數的范圍通常是從 -2^63 到 2^63-1，即從 -9223372036854775808 到 9223372036854775807；無符號整數的范圍通常是從 0 到 2^64-1，即從 0 到 18446744073709551615。

整數結構體


// Include/object.h // 基礎對象，定長的
typedef struct _object {_PyObject_HEAD_EXTRA/*引用計數，用于垃圾回收*/Py_ssize_t ob_refcnt;           /*指向對象類型的指針，用于標識對象的類型，運行時類型檢查和類型特定的操作，每個對象有一個類型對象，定義了該對象的屬性、行為、方法等。PyObject 對象到底是什么類型的，只有再調用的時候，通過ob_type來判斷，即多態機制*/struct _typeobject *ob_type;    
} PyObject;// 可變長對象
typedef struct {PyObject ob_base;Py_ssize_t ob_size; /*  可變部分的項目數 */
} PyVarObject; // 定義所有可變大小容器對象的初始段。
#define PyObject_VAR_HEAD      PyVarObject ob_base;// Include/longintrepr.h
typedef struct _longobject PyLongObject; /* Revealed in longintrepr.h */struct _longobject {PyObject_VAR_HEAD// 定義了一個數組 ob_digit，其類型為 digit（即uint32_t），該數組只有一個元素digit ob_digit[1];
};// 結合上面的結構體
typedef struct {_PyObject_HEAD_EXTRAPy_ssize_t ob_refcnt; // 引用計數 8字節struct _typeobject *ob_type; // 類型 8字節Py_ssize_t ob_size; // 元素個數	8字節digit ob_digit[1]; // digit類型的數組，默認長度為1
} PyLongObject;

PyLongObject 對象中數組ob_digit 是 digit 類型的，默認長度是 1，python 中的整數就是存在這個數組中的，看下 digit 的類型


// Include/longintrepr.h
// 值為30表示64位系統，值為15表示32位系統
#if PYLONG_BITS_IN_DIGIT == 30
// uint32_t 是一個無符號32位整數類型
typedef uint32_t digit;
#define PyLong_SHIFT    30
......#elif PYLONG_BITS_IN_DIGIT == 15
// unsigned short 一個16位的無符號整數類型
typedef unsigned short digit; 
#define PyLong_SHIFT    15

當操作系統 64 位時，digit 的類型是無符號的 32 位整數類型，并且ob_digit 數組中每一位存儲的最大數字為 (2^30)-1 即1073741823，此處 30 是 PyLong_SHIFT 的值。如果一個數值大于1073741823，則數組長度通過PyLong_SHIFT 進行計算。
操作系統是 32 位，digit 的類型是一個16位的無符號整數類型，PyLong_SHIFT 值為 15。
看下longintrepr.h 中一段注釋

/* Long integer representation.The absolute value of a number is equal toSUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)Negative numbers are represented with ob_size < 0;zero is represented by ob_size == 0.In a normalized number, ob_digit[abs(ob_size)-1] (the most significantdigit) is never zero.  Also, in all cases, for all valid i,0 <= ob_digit[i] <= MASK.The allocation function takes care of allocating extra memoryso that ob_digit[0] ... ob_digit[abs(ob_size)-1] are actually available.CAUTION:  Generic code manipulating subtypes of PyVarObject has toaware that ints abuse  ob_size's sign bit.
*/

PyLongObject 對象中 ob_size 即表示數組ob_digit 的長度，又表示整數的符號。
ob_size如果小于零，則表示一個負數，ob_size 如果等于零，表示 0。而整個整數的值則通過表達式來計算：
**SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)**
比如數字：1234567890987654321 在ob_digit 中的存儲：
此時 ob_size => 3; ob_digit => {829168817, 76039122, 1}
根據公式反推一下ob_digit 數組的值：
第一步：

temp = 1234567890987654321
ob_digit[0] = 829168817 => temp % (2^30)
temp = 1149780946 =>  temp // (2^30)
ob_size++

第二步：

temp = 1149780946
ob_digit[1] = 76039122 => temp % (2^30)
temp = 1 =>  temp // (2^30)
ob_size++

第三步：

temp = 1
ob_digit[2] = 1 => temp % (2^30)
temp = 0 =>  temp // (2^30)
ob_size++

根據公式反算一下 829168817*2**(30*0) + 76039122*2**(30*1) + 1*2**(30*2)
用 python 來模擬查看下PyLongObject 對象

# cpython長整數底層存儲算法
import math
import ctypesclass PyLong:SHIFT = 30MASK = (2 ** SHIFT)def parse_ob_size(self, longint):"""解析數組長度:param longint::return:"""ob_size = int(math.log(10) / math.log(self.MASK) * len(str(longint)) + 1)print(ob_size)return ob_sizedef parse_ob_digit(self, longint):n = abs(longint)ob_digit = []while n != 0:digit = n % self.MASKob_digit.append(digit)n //= self.MASKprint(ob_digit) # [829168817, 76039122, 1]def parse_ob_digit_by_struct(self, longint):"""通過訪問底層地址查看ob_digit數組:param longint::return:"""_ob_size = self.parse_ob_size(longint)class _PyLongObject(ctypes.Structure):# c_ssize_t 是一個表示 C 語言中 ssize_t 類型的外包裝類。ssize_t 是一個有符號整數類型，即 Py_ssize_t# c_void_p 是一個表示通用指針類型的外包裝類，它對應于 C 語言中的 void* 類型。void* 是一個泛型指針# c_uint32 是一個外包裝類，用于表示無符號的 32 位整數，對應于 C 語言中的 uint32_t 類型_fields_ = [("ob_refcnt", ctypes.c_ssize_t),("ob_type", ctypes.c_void_p),("ob_size", ctypes.c_ssize_t),("ob_digit", ctypes.c_uint32 * _ob_size)]long_object = _PyLongObject.from_address(id(longint))ob_size = abs(long_object.ob_size)ob_digit = long_object.ob_digit[:ob_size]print(ob_digit, ob_size) # [829168817, 76039122, 1], 3if __name__ == '__main__':pylong = PyLong()data = 1234567890987654321pylong.parse_ob_size(data)pylong.parse_ob_digit(data)pylong.parse_ob_digit_by_struct(data)

來看下幾個特殊的數是怎么存的：
0 ob_size 如果等于零，表示 0，ob_size => 0
1 ob_size => 1; ob_digit=>{1}
-1 ob_size => -1; ob_digit=>{1}
(2 ^ 30) -1 ob_size => 1; ob_digit=>{1073741823}
-(2 ^ 30) -1 ob_size => -1; ob_digit=>{1073741823}
(2 ^ 30) ob_size => 2; ob_digit=>{0, 1}
-(2 ^ 30) ob_size => -2; ob_digit=>{0, 1}
整數占內存大小
ob_refcnt 是 8 字節，ob_type 指針類型占 8 字節，ob_size 占 8 字節，ob_digit 是 4 字節。所以整數的大小是，83+ob_size 絕對值*4

import sys# 1的ob_size是1，占內存大小為24+4*1=28
sys.getsizeof(1) # 28# 0的ob_size是0，說明ob_digit長度是0，24+4*0=24sys.getsizeof(1) # 24#(2**30)-1的ob_size是1，內存大小為24+4*1=28
sys.getsizeof((2**30)-1) # 28#2**30的ob_size是2，內存大小為24+4*2=32
sys.getsizeof(2**30) # 32

創建整數的方法
PyLong_FromLong 使用 C 的 long 類型創建 python 整數
PyLong_FromUnsignedLong 使用 C 的無符號 long 類型創建
PyLong_FromDouble 使用 C 的 longlong 類型創建
PyLong_FromVoidPtr 使用 C 的指針類型創建
PyLong_FromLongLong 使用 C 的 longlong 類型創建
PyLong_FromUnsignedLongLong 使用 C 的無符號 longlong 類型創建
PyLong_FromSsize_t 使用 C 的Py_ssize_t 類型創建
PyLong_FromSize_t 使用 C 的size_t 類型創建
創建整數對象
_PyLong_New

PyLongObject *
_PyLong_New(Py_ssize_t size)
{PyLongObject *result; // result 是一個PyLongObject類型的指針/* Number of bytes needed is: offsetof(PyLongObject, ob_digit) +sizeof(digit)*size.  Previous incarnations of this code usedsizeof(PyVarObject) instead of the offsetof, but this risks beingincorrect in the presence of padding between the PyVarObject headerand the digits. 所需字節數為offsetof(PyLongObject, ob_digit) + sizeof(digit)*size此代碼的先前版本使用sizeof(PyVarObject)而不是offsetof，但在PyVarObject頭文件和數字之間存在填充時，這有可能是不正確的。*/if (size > (Py_ssize_t)MAX_LONG_DIGITS) {PyErr_SetString(PyExc_OverflowError,"too many digits in integer");return NULL;}/* PyObject_MALLOC 通常用于分配小塊內存offsetof(PyLongObject, ob_digit) 表示獲取 PyLongObject 結構體中 ob_digit 成員相對于結構體起始地址的偏移量申請內存存儲PyLongObject結構體和長度為size 數組 ob_digit*/result = PyObject_MALLOC(offsetof(PyLongObject, ob_digit) +size*sizeof(digit));if (!result) {PyErr_NoMemory();return NULL;}// 初始化ob_type、ob_size、ob_refcnt等值return (PyLongObject*)PyObject_INIT_VAR(result, &PyLong_Type, size);
}

整數類型

// 整數對象的類型
PyTypeObject PyLong_Type = {PyVarObject_HEAD_INIT(&PyType_Type, 0)"int",                                      /* tp_name */offsetof(PyLongObject, ob_digit),           /* tp_basicsize */sizeof(digit),                              /* tp_itemsize */long_dealloc,                               /* tp_dealloc 析構操作，計數器為0時，清除對象*/0,                                          /* tp_print */0,                                          /* tp_getattr */0,                                          /* tp_setattr */0,                                          /* tp_reserved */long_to_decimal_string,                     /* tp_repr */&long_as_number,                            /* tp_as_number 數值相關的操作*/0,                                          /* tp_as_sequence */0,                                          /* tp_as_mapping */(hashfunc)long_hash,                        /* tp_hash 哈希函數，是可哈希的*/0,                                          /* tp_call */long_to_decimal_string,                     /* tp_str */PyObject_GenericGetAttr,                    /* tp_getattro */0,                                          /* tp_setattro */0,                                          /* tp_as_buffer */Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |Py_TPFLAGS_LONG_SUBCLASS,               /* tp_flags */long_doc,                                   /* tp_doc */0,                                          /* tp_traverse */0,                                          /* tp_clear */long_richcompare,                           /* tp_richcompare 比較操作*/0,                                          /* tp_weaklistoffset */0,                                          /* tp_iter */0,                                          /* tp_iternext */long_methods,                               /* tp_methods 相關的函數*/0,                                          /* tp_members */long_getset,                                /* tp_getset */0,                                          /* tp_base */0,                                          /* tp_dict */0,                                          /* tp_descr_get */0,                                          /* tp_descr_set */0,                                          /* tp_dictoffset */0,                                          /* tp_init */0,                                          /* tp_alloc */long_new,                                   /* tp_new */PyObject_Del,                               /* tp_free */
};

比較操作

/* 復雜的比較操作
self    本身
other   比較對象
op      比較操作
*/
static PyObject *
long_richcompare(PyObject *self, PyObject *other, int op)
{int result;CHECK_BINOP(self, other); // 檢測 self other 是不是長整數// 兩個對象地址相同時，則表明是同一個對象，不需要比較if (self == other)result = 0;elseresult = long_compare((PyLongObject*)self, (PyLongObject*)other);Py_RETURN_RICHCOMPARE(result, 0, op);
}// 長整數比較
static int
long_compare(PyLongObject *a, PyLongObject *b)
{Py_ssize_t sign;/*對于長整數對象，Py_SIZE 返回的是數字中絕對值的位數（二進制位）。注意，這個大小是包括符號位的，所以一個正數和它的負數值會有相同的大小。對于列表、元組或其他序列類型的對象，Py_SIZE 通常返回序列中元素的數量。*/ if (Py_SIZE(a) != Py_SIZE(b)) {// a和b的ob_size不相等時，兩個ob_size相減，然后根據符號判斷哪個數大sign = Py_SIZE(a) - Py_SIZE(b);}else {// 如果a和b的ob_size相同，需要逐個比較ob_digit中的值Py_ssize_t i = Py_ABS(Py_SIZE(a));// 從后往前（因為高位的數放在后面），循環比較ob_digit中的值while (--i >= 0 && a->ob_digit[i] == b->ob_digit[i]);// a和b數組ob_digit值都一樣，執行--i后i就小于零if (i < 0)sign = 0;else {// 如果a、b的ob_digit中有1位不相同，則只需要比較當前位上的數字，就能分出大小sign = (sdigit)a->ob_digit[i] - (sdigit)b->ob_digit[i];// 如果a是負數，則比較結果就要加上負號if (Py_SIZE(a) < 0)sign = -sign;}}// 最終檢查sign的值// sign < 0, a < b// sign > 0, a > b// sign = 0, a = breturn sign < 0 ? -1 : sign > 0 ? 1 : 0;
}

整數類型的函數集

static PyNumberMethods long_as_number = {(binaryfunc)long_add,       /*nb_add 加法*/	(binaryfunc)long_sub,       /*nb_subtract 減法*/(binaryfunc)long_mul,       /*nb_multiply 乘法*/long_mod,                   /*nb_remainder 除法*/long_divmod,                /*nb_divmod 取余*/long_pow,                   /*nb_power 乘方*/(unaryfunc)long_neg,        /*nb_negative*/(unaryfunc)long_long,       /*tp_positive*/(unaryfunc)long_abs,        /*tp_absolute*/(inquiry)long_bool,         /*tp_bool*/(unaryfunc)long_invert,     /*nb_invert*/long_lshift,                /*nb_lshift*/(binaryfunc)long_rshift,    /*nb_rshift*/long_and,                   /*nb_and*/long_xor,                   /*nb_xor*/long_or,                    /*nb_or*/long_long,                  /*nb_int*/0,                          /*nb_reserved*/long_float,                 /*nb_float*/0,                          /* nb_inplace_add */0,                          /* nb_inplace_subtract */0,                          /* nb_inplace_multiply */0,                          /* nb_inplace_remainder */0,                          /* nb_inplace_power */0,                          /* nb_inplace_lshift */0,                          /* nb_inplace_rshift */0,                          /* nb_inplace_and */0,                          /* nb_inplace_xor */0,                          /* nb_inplace_or */long_div,                   /* nb_floor_divide */long_true_divide,           /* nb_true_divide */0,                          /* nb_inplace_floor_divide */0,                          /* nb_inplace_true_divide */long_long,                  /* nb_index */
};