import re
import sys
import os
import ast
from tokenize import generate_tokens, COMMENT, STRING, NL, INDENT, DEDENT
import iodef extract_entities(filename):"""提取類和函數到單獨文件"""with open(filename, 'r', encoding='utf-8') as f:content = f.read()# 計算總行數total_lines = content.count('\n') + 1long_function_threshold = max(80, total_lines // 12)# 初始化結果entities = []current_pos = 0# 嘗試使用AST解析獲取更準確的位置信息try:tree = ast.parse(content)ast_entities = []for node in tree.body:if isinstance(node, ast.ClassDef):start_line = node.linenoend_line = node.end_linenoast_entities.append(('class', node.name, start_line, end_line))elif isinstance(node, ast.FunctionDef):start_line = node.linenoend_line = node.end_linenoast_entities.append(('function', node.name, start_line, end_line))# 使用AST信息輔助提取lines = content.split('\n')for e_type, name, start_line, end_line in ast_entities:start_index = sum(len(line) + 1 for line in lines[:start_line-1])end_index = sum(len(line) + 1 for line in lines[:end_line]) - 1# 對于函數,檢查是否為長函數if e_type == 'function':func_lines = end_line - start_line + 1if func_lines > long_function_threshold:func_content = content[start_index:end_index]entities.append(('function', name, start_index, end_index, func_content))# 類總是提取elif e_type == 'class':class_content = content[start_index:end_index]entities.append(('class', name, start_index, end_index, class_content))except SyntaxError:# 如果AST解析失敗,使用基于縮進的方法return extract_entities_with_indent(content, total_lines, long_function_threshold)return entities, content, total_linesdef extract_entities_with_indent(content, total_lines, long_function_threshold):"""當AST解析失敗時使用基于縮進的方法"""entities = []current_pos = 0# 類定義正則(支持裝飾器和繼承)class_pattern = re.compile(r'^([ \t]*)@?.*?\b(class|struct)\s+(\w+)\s*[\(:]?[^{:]*[:{]?', re.MULTILINE)# 函數定義正則(支持裝飾器和類型注解)func_pattern = re.compile(r'^([ \t]*)@?.*?\b(def)\s+(\w+)\s*\([^{:]*\)\s*[^{:]*[:{]?', re.MULTILINE)# 提取類for match in class_pattern.finditer(content):indent = match.group(1)class_name = match.group(3)start_index = match.start()# 找到類體的結束位置end_index = find_block_end(content, start_index, indent)if end_index == -1:continueclass_content = content[start_index:end_index]entities.append(('class', class_name, start_index, end_index, class_content))current_pos = end_index# 提取函數(只提取頂級函數,忽略類內方法)for match in func_pattern.finditer(content):func_name = match.group(3)start_index = match.start()indent = match.group(1)# 跳過類內的方法if any(start_index > c_start and start_index < c_end for (t, _, c_start, c_end, _) in entities if t == 'class'):continue# 找到函數體的結束位置end_index = find_block_end(content, start_index, indent)if end_index == -1:continuefunc_content = content[start_index:end_index]func_lines = func_content.count('\n') + 1# 檢查是否為長函數if func_lines > long_function_threshold:entities.append(('function', func_name, start_index, end_index, func_content))current_pos = end_index# 按起始位置排序entities.sort(key=lambda x: x[2])return entities, content, total_linesdef find_block_end(content, start_index, base_indent):"""找到代碼塊的結束位置"""base_indent_level = len(base_indent) if base_indent else 0current_index = start_indexstack = []in_string = Falsestring_char = Nonewhile current_index < len(content):char = content[current_index]# 處理字符串字面量if not in_string and char in ('"', "'"):in_string = Truestring_char = charelif in_string and char == string_char:# 檢查是否是轉義的引號if content[current_index-1] == '\\':# 檢查轉義字符本身是否被轉義backslash_count = 0i = current_index - 1while i >= 0 and content[i] == '\\':backslash_count += 1i -= 1if backslash_count % 2 == 0: # 偶數個反斜杠,引號未被轉義in_string = Falsestring_char = Noneelse:in_string = Falsestring_char = Noneif in_string:current_index += 1continue# 處理括號if char in '([{':stack.append(char)elif char in ')]}':if not stack:return -1 # 不匹配的括號last_open = stack.pop()if (last_open == '(' and char != ')') or \(last_open == '[' and char != ']') or \(last_open == '{' and char != '}'):return -1 # 括號不匹配# 檢查代碼塊結束if char == '\n':next_line_start = current_index + 1if next_line_start >= len(content):return next_line_start # 文件結束# 檢查下一行的縮進級別next_line_end = content.find('\n', next_line_start)if next_line_end == -1:next_line_end = len(content)next_line = content[next_line_start:next_line_end]indent_level = len(next_line) - len(next_line.lstrip())# 如果縮進小于基礎縮進且沒有未閉合的括號,則塊結束if indent_level <= base_indent_level and not stack:# 確保不是空行或注釋stripped_line = next_line.strip()if stripped_line and not stripped_line.startswith('#'):return next_line_startcurrent_index += 1return len(content) # 到達文件末尾def write_entities(entities, content, filename):"""將實體寫入文件并生成剩余內容"""# 創建輸出目錄base_name = os.path.splitext(os.path.basename(filename))[0]output_dir = f"{base_name}_split"os.makedirs(output_dir, exist_ok=True)# 提取覆蓋范圍covered_ranges = []for entity in entities:e_type, e_name, start, end, e_content = entity# 確保每個實體都寫入單獨的文件output_path = os.path.join(output_dir, f"{e_name}.py")with open(output_path, 'w', encoding='utf-8') as f:f.write(e_content)covered_ranges.append((start, end))# 生成剩余內容covered_ranges.sort(key=lambda x: x[0])remaining_parts = []last_pos = 0for start, end in covered_ranges:remaining_parts.append(content[last_pos:start])last_pos = endremaining_parts.append(content[last_pos:])remaining_content = ''.join(remaining_parts)# 寫入剩余文件left_path = os.path.join(output_dir, "left.py")with open(left_path, 'w', encoding='utf-8') as f:f.write(remaining_content)return output_dirdef main():if len(sys.argv) != 2:print("Usage: python split_code.py <source_file.py>")sys.exit(1)filename = sys.argv[1]if not os.path.isfile(filename):print(f"Error: File not found - {filename}")sys.exit(1)try:entities, content, total_lines = extract_entities(filename)output_dir = write_entities(entities, content, filename)class_count = sum(1 for e in entities if e[0] == 'class')func_count = sum(1 for e in entities if e[0] == 'function')print(f"Processed {filename}:")print(f"- Total lines: {total_lines}")print(f"- Long function threshold: {max(80, total_lines//12)} lines")print(f"- Extracted: {class_count} classes, {func_count} long functions")print(f"- Output directory: {output_dir}")print(f"- Remaining code in: {os.path.join(output_dir, 'left.py')}")except Exception as e:print(f"Error processing file: {str(e)}")import tracebacktraceback.print_exc()sys.exit(1)if __name__ == "__main__":main()
需求:?
寫一個python腳本,閱讀python源代碼,i, 把源文件中各個class 切分到文件中,文件名是類名,ii,長函數(代碼行超過本文件長度1/12, 或者超過80行,兩個條件滿足一個即可)切分到文件中,文件名是函數名。 iii,剩余部分放在left.py文件中,謝謝