生成訓練集、驗證集和測試集
每個split文件應該包含:
- 訓練集(id=1): 70個視頻
- 測試集(id=2): 30個視頻
- 未使用(id=0): 剩余視頻
這是一個70/30的訓練/測試分割比例。標記為0的視頻被排除在當前實驗之外。 - 實際上訓練集(id=1),驗證集(id=2),測試集(id=0),測試集和驗證集可以互換
import os
import glob
from pathlib import Path# 設置數據處理的路徑
SPLIT_DIR = r"C:/Users/yanho/Desktop/testTrainMulti_7030_splits" # split文件所在目錄
OUTPUT_DIR = r"C:/Users/yanho/Desktop/hmdb51_annotations" # 輸出目錄def process_split_file(split_file, action_class):"""處理單個split文件并生成訓練、驗證和測試列表"""train_videos = []val_videos = []test_videos = []with open(split_file, 'r') as f:for line in f:video_name, label = line.strip().split()# 為每個視頻添加類別標簽(action_class)video_info = f'{video_name} {action_class}\n'if label == '1':train_videos.append(video_info)elif label == '2':val_videos.append(video_info)elif label == '0':test_videos.append(video_info)return train_videos, val_videos, test_videosdef main():# 創建輸出目錄output_dir = Path(OUTPUT_DIR)output_dir.mkdir(parents=True, exist_ok=True)# 用于收集所有訓練、驗證和測試樣本all_train_videos = []all_val_videos = []all_test_videos = []# 處理所有split1文件split_files = glob.glob(os.path.join(SPLIT_DIR, '*_test_split1.txt'))for split_file in split_files:# 從文件名中提取動作類別action_class = Path(split_file).stem.split('_test_split1')[0]# 獲取當前類別的動作類別索引class_idx = split_files.index(split_file)print(f'處理類別: {action_class} (索引: {class_idx})')# 處理當前split文件train_videos, val_videos, test_videos = process_split_file(split_file, class_idx)all_train_videos.extend(train_videos)all_val_videos.extend(val_videos)all_test_videos.extend(test_videos)# 寫入訓練集文件train_file = output_dir / 'train_split1.txt'with open(train_file, 'w') as f:f.writelines(all_train_videos)# 寫入驗證集文件val_file = output_dir / 'val_split1.txt'with open(val_file, 'w') as f:f.writelines(all_val_videos)# 寫入測試集文件test_file = output_dir / 'test_split1.txt'with open(test_file, 'w') as f:f.writelines(all_test_videos)print(f'\n處理完成!')print(f'訓練集樣本數: {len(all_train_videos)}')print(f'驗證集樣本數: {len(all_val_videos)}')print(f'測試集樣本數: {len(all_test_videos)}')print(f'文件保存在: {output_dir}')if __name__ == '__main__':main()
合并視頻到統一目錄
import os
import shutil
from pathlib import Pathdef merge_videos(src_dir, dst_dir):"""合并所有視頻到統一目錄Args:src_dir: 源視頻目錄dst_dir: 目標目錄"""# 創建目標目錄Path(dst_dir).mkdir(parents=True, exist_ok=True)# 遍歷源目錄for root, dirs, files in os.walk(src_dir):for file in files:if file.endswith('.avi'): # 只處理視頻文件# 獲取類別名稱category = os.path.basename(os.path.dirname(root))# 創建類別子目錄category_dir = os.path.join(dst_dir, category)Path(category_dir).mkdir(exist_ok=True)# 源文件和目標文件路徑src_path = os.path.join(root, file)dst_path = os.path.join(category_dir, file)# 復制文件shutil.copy2(src_path, dst_path)print(f"Copied {src_path} -> {dst_path}")if __name__ == "__main__":src_dir = "videos_src" # 源視頻目錄dst_dir = "data/hmdb51/videos" # 目標目錄merge_videos(src_dir, dst_dir) # 合并視頻到統一目錄,一共6766個視頻