kaggle視頻行為分析1st and Future - Player Contact Detection

這次比賽的目標是檢測美式橄欖球NFL比賽中球員經歷的外部接觸。您將使用視頻和球員追蹤數據來識別發生接觸的時刻，以幫助提高球員的安全。兩種接觸，一種是人與人的，另一種是人與地面，不包括腳底和地面的，跟我之前做的這個是同一個主辦方舉行的

kaggle視頻追蹤NFL Health & Safety - Helmet Assignment-CSDN博客

之前做的是視頻追蹤，用的deepsort，這一場比賽用的2.5DCNN。

EDA部分

eda可以參考這一個notebook，用的fasteda，挺方便的

NFL Player Contact Detection EDA 🏈 | Kaggle

視頻數據在test和train文件夾里面，還提供了這一個train_baseline_helmets.csv，是由上一次比賽的冠軍方案產生的，是我之前做的視頻追蹤，train_player_tracking.csv?的頻率是10HZ，視頻是59.94HZ，之后要進行轉換，snap 事件也就是比賽開始發生在視頻的第五秒

train_labels.csv

step: A number representing each each timestep for each play, starting at 0 at the moment of the play starting, and incrementing by 1 every 0.1 seconds.
之前說的比賽第5秒開始，一個step是0.1秒

接觸發生以10HZ記錄

[train/test]_player_tracking.csv

datetime: timestamp at 10 Hz.

[train/test]_video_metadata.csv

be used to sync with player tracking data.和視頻是同步的

訓練部分

我自己租卡跑，20多個小時，10個epoch，我上傳到kaggle，鏈接如下

track_weight | Kaggle

額外要用的一個數據集如下，我用的的4090顯卡20核跑的，你要自己訓練的話要自己修改一下

timm-0.6.9 | Kaggle

導入包

import os
import sys
import glob
import numpy as np
import pandas as pd
import random
import math
import gc
import cv2
from tqdm import tqdm
import time
from functools import lru_cache
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from timm.scheduler import CosineLRScheduler
sys.path.append('../input/timm-0-6-9/pytorch-image-models-master')

配置

CFG = {'seed': 42,'model': 'convnext_small.fb_in1k','img_size': 256,'epochs': 10,'train_bs': 48, 'valid_bs': 32,'lr': 1e-3, 'weight_decay': 1e-6,'num_workers': 20,'max_grad_norm' : 1000,'epochs_warmup' : 3.0
}

我用的convnext，這個網絡是原本的cnn根據vit模型去反復修改的，有興趣自己去找論文看，但論文也就是在那反復調

設置種子和device

def seed_everything(seed):random.seed(seed)os.environ['PYTHONHASHSEED'] = str(seed)np.random.seed(seed)torch.manual_seed(seed)torch.cuda.manual_seed(seed)torch.cuda.manual_seed_all(seed)torch.backends.cudnn.deterministic = Truetorch.backends.cudnn.benchmark = Falseseed_everything(CFG['seed'])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

添加一些額外的列和讀取數據

def expand_contact_id(df):"""Splits out contact_id into seperate columns."""df["game_play"] = df["contact_id"].str[:12]df["step"] = df["contact_id"].str.split("_").str[-3].astype("int")df["nfl_player_id_1"] = df["contact_id"].str.split("_").str[-2]df["nfl_player_id_2"] = df["contact_id"].str.split("_").str[-1]return df
labels = expand_contact_id(pd.read_csv("../input/nfl-player-contact-detection/train_labels.csv"))
train_tracking = pd.read_csv("../input/nfl-player-contact-detection/train_player_tracking.csv")
train_helmets = pd.read_csv("../input/nfl-player-contact-detection/train_baseline_helmets.csv")
train_video_metadata = pd.read_csv("../input/nfl-player-contact-detection/train_video_metadata.csv")

將視頻數據轉化為圖像數據

import subprocess
from tqdm import tqdm# 假設 train_helmets 是一個包含視頻文件名的 DataFrame
for video in tqdm(train_helmets.video.unique()):if 'Endzone2' not in video:# 輸入視頻路徑input_path = f'/openbayes/home/train/{video}'# 輸出幀路徑output_path = f'/openbayes/train/frames/{video}_%04d.jpg'# 構建 ffmpeg 命令command = ['ffmpeg','-i', input_path,  # 輸入視頻文件'-q:v', '5',       # 設置輸出圖像質量'-f', 'image2',    # 輸出為圖像序列output_path,       # 輸出圖像路徑'-hide_banner',    # 隱藏 ffmpeg 的 banner 信息'-loglevel', 'error'  # 只顯示錯誤日志]# 執行命令subprocess.run(command, check=True)

可以自己修改那里的質量，在kaggle上不能訓練，要你自己租卡才跑的動

創建一些特征

def create_features(df, tr_tracking, merge_col="step", use_cols=["x_position", "y_position"]):output_cols = []df_combo = (df.astype({"nfl_player_id_1": "str"}).merge(tr_tracking.astype({"nfl_player_id": "str"})[["game_play", merge_col, "nfl_player_id",] + use_cols],left_on=["game_play", merge_col, "nfl_player_id_1"],right_on=["game_play", merge_col, "nfl_player_id"],how="left",).rename(columns={c: c+"_1" for c in use_cols}).drop("nfl_player_id", axis=1).merge(tr_tracking.astype({"nfl_player_id": "str"})[["game_play", merge_col, "nfl_player_id"] + use_cols],left_on=["game_play", merge_col, "nfl_player_id_2"],right_on=["game_play", merge_col, "nfl_player_id"],how="left",).drop("nfl_player_id", axis=1).rename(columns={c: c+"_2" for c in use_cols}).sort_values(["game_play", merge_col, "nfl_player_id_1", "nfl_player_id_2"]).reset_index(drop=True))output_cols += [c+"_1" for c in use_cols]output_cols += [c+"_2" for c in use_cols]if ("x_position" in use_cols) & ("y_position" in use_cols):index = df_combo['x_position_2'].notnull()distance_arr = np.full(len(index), np.nan)tmp_distance_arr = np.sqrt(np.square(df_combo.loc[index, "x_position_1"] - df_combo.loc[index, "x_position_2"])+ np.square(df_combo.loc[index, "y_position_1"]- df_combo.loc[index, "y_position_2"]))distance_arr[index] = tmp_distance_arrdf_combo['distance'] = distance_arroutput_cols += ["distance"]df_combo['G_flug'] = (df_combo['nfl_player_id_2']=="G")output_cols += ["G_flug"]return df_combo, output_colsuse_cols = ['x_position', 'y_position', 'speed', 'distance','direction', 'orientation', 'acceleration', 'sa'
]train, feature_cols = create_features(labels, train_tracking, use_cols=use_cols)

label和train_tracking進行合并，這里的feature_cols后面訓練要用到

和視頻的頻率進行同步，過濾一部分數據

train_filtered = train.query('not distance>2').reset_index(drop=True)
train_filtered['frame'] = (train_filtered['step']/10*59.94+5*59.94).astype('int')+1
train_filtered.head()

視頻頻率是59.94,而數據集是10,這里將距離過大的pair去除

數據增強

train_aug = A.Compose([A.HorizontalFlip(p=0.5),A.ShiftScaleRotate(p=0.5),A.RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5),A.Normalize(mean=[0.], std=[1.]),ToTensorV2()
])valid_aug = A.Compose([A.Normalize(mean=[0.], std=[1.]),ToTensorV2()
])

創建字典

video2helmets = {}
train_helmets_new = train_helmets.set_index('video')
for video in tqdm(train_helmets.video.unique()):video2helmets[video] = train_helmets_new.loc[video].reset_index(drop=True)
video2frames = {}for game_play in tqdm(train_video_metadata.game_play.unique()):for view in ['Endzone', 'Sideline']:video = game_play + f'_{view}.mp4'video2frames[video] = max(list(map(lambda x:int(x.split('_')[-1].split('.')[0]), \glob.glob(f'../train/frames/{video}*'))))

取出視頻對應的檢測數據和每個視頻的最大幀數，檢測數據后面用來截取圖像用的，最大幀數確保抽取的幀不超過這個范圍

數據集

class MyDataset(Dataset):def __init__(self, df, aug=train_aug, mode='train'):self.df = dfself.frame = df.frame.valuesself.feature = df[feature_cols].fillna(-1).valuesself.players = df[['nfl_player_id_1','nfl_player_id_2']].valuesself.game_play = df.game_play.valuesself.aug = augself.mode = modedef __len__(self):return len(self.df)# @lru_cache(1024)# def read_img(self, path):#     return cv2.imread(path, 0)def __getitem__(self, idx):   window = 24frame = self.frame[idx]if self.mode == 'train':frame = frame + random.randint(-6, 6)players = []for p in self.players[idx]:if p == 'G':players.append(p)else:players.append(int(p))imgs = []for view in ['Endzone', 'Sideline']:video = self.game_play[idx] + f'_{view}.mp4'tmp = video2helmets[video]
#             tmp = tmp.query('@frame-@window<=frame<=@frame+@window')tmp[tmp['frame'].between(frame-window, frame+window)]tmp = tmp[tmp.nfl_player_id.isin(players)]#.sort_values(['nfl_player_id', 'frame'])tmp_frames = tmp.frame.valuestmp = tmp.groupby('frame')[['left','width','top','height']].mean()
#0.002sbboxes = []for f in range(frame-window, frame+window+1, 1):if f in tmp_frames:x, w, y, h = tmp.loc[f][['left','width','top','height']]bboxes.append([x, w, y, h])else:bboxes.append([np.nan, np.nan, np.nan, np.nan])bboxes = pd.DataFrame(bboxes).interpolate(limit_direction='both').valuesbboxes = bboxes[::4]if bboxes.sum() > 0:flag = 1else:flag = 0
#0.03sfor i, f in enumerate(range(frame-window, frame+window+1, 4)):img_new = np.zeros((256, 256), dtype=np.float32)if flag == 1 and f <= video2frames[video]:img = cv2.imread(f'../train/frames/{video}_{f:04d}.jpg', 0)x, w, y, h = bboxes[i]img = img[int(y+h/2)-128:int(y+h/2)+128,int(x+w/2)-128:int(x+w/2)+128].copy()img_new[:img.shape[0], :img.shape[1]] = imgimgs.append(img_new)
#0.06sfeature = np.float32(self.feature[idx])img = np.array(imgs).transpose(1, 2, 0)    img = self.aug(image=img)["image"]label = np.float32(self.df.contact.values[idx])return img, feature, label

模型

class Model(nn.Module):def __init__(self):super(Model, self).__init__()self.backbone = timm.create_model(CFG['model'], pretrained=True, num_classes=500, in_chans=13)self.mlp = nn.Sequential(nn.Linear(18, 64),nn.LayerNorm(64),nn.ReLU(),nn.Dropout(0.2),)self.fc = nn.Linear(64+500*2, 1)def forward(self, img, feature):b, c, h, w = img.shapeimg = img.reshape(b*2, c//2, h, w)img = self.backbone(img).reshape(b, -1)feature = self.mlp(feature)y = self.fc(torch.cat([img, feature], dim=1))return y

這里len(feature_cols)是18,所以mlp輸入是18,在上面

            for i, f in enumerate(range(frame-window, frame+window+1, 4)):img_new = np.zeros((256, 256), dtype=np.float32)if flag == 1 and f <= video2frames[video]:img = cv2.imread(f'/openbayes/train/frames/{video}_{f:04d}.jpg', 0)x, w, y, h = bboxes[i]img = img[int(y+h/2)-128:int(y+h/2)+128,int(x+w/2)-128:int(x+w/2)+128].copy()img_new[:img.shape[0], :img.shape[1]] = imgimgs.append(img_new)

進行了抽幀，每個視角抽了13幀，兩個視角，總計26幀，所以輸入通道26，跟之前的比賽一樣，也是提供兩個視角

for view in ['Endzone', 'Sideline']:

損失函數

model = Model()
model.to(device)
model.train()
import torch.nn as nn
criterion = nn.BCEWithLogitsLoss()

這里用的交叉熵

評估指標

def evaluate(model, loader_val, *, compute_score=True, pbar=None):"""Predict and compute loss and score"""tb = time.time()in_training = model.trainingmodel.eval()loss_sum = 0.0n_sum = 0y_all = []y_pred_all = []if pbar is not None:pbar = tqdm(desc='Predict', nrows=78, total=pbar)total= len(loader_val)for ibatch,(img, feature, label) in tqdm(enumerate(loader_val),total = total):# img, feature, label = [x.to(device) for x in batch]img = img.to(device)feature = feature.to(device)n = label.size(0)label = label.to(device)with torch.no_grad():y_pred = model(img, feature)loss = criterion(y_pred.view(-1), label)n_sum += nloss_sum += n * loss.item()if pbar is not None:pbar.update(len(img))del loss, img, labelgc.collect()loss_val = loss_sum / n_sumret = {'loss': loss_val,'time': time.time() - tb}model.train(in_training) gc.collect()return ret

載入數據，設置學習率計劃和優化器

train_set,valid_set = train_test_split(train_filtered,test_size=0.05, random_state=42,stratify = train_filtered['contact'])
train_set = MyDataset(train_set, train_aug, 'train')
train_loader = DataLoader(train_set, batch_size=CFG['train_bs'], shuffle=True, num_workers=12, pin_memory=True,drop_last=True)
valid_set = MyDataset(valid_set, valid_aug, 'test')
valid_loader = DataLoader(valid_set, batch_size=CFG['valid_bs'], shuffle=False, num_workers=12, pin_memory=True)
optimizer = torch.optim.AdamW(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'])
nbatch = len(train_loader)
warmup = CFG['epochs_warmup'] * nbatch
nsteps = CFG['epochs'] * nbatch 
scheduler = CosineLRScheduler(optimizer,warmup_t=warmup, warmup_lr_init=0.0, warmup_prefix=True,t_initial=(nsteps - warmup), lr_min=1e-6)

開始訓練，這里保存整個模型

for iepoch in range(CFG['epochs']):print('Epoch:', iepoch+1)loss_sum = 0.0n_sum = 0total = len(train_loader)# Trainfor ibatch,(img, feature, label) in tqdm(enumerate(train_loader),total = total):img = img.to(device)feature = feature.to(device)n = label.size(0)label = label.to(device)optimizer.zero_grad()y_pred = model(img, feature).squeeze(-1)loss = criterion(y_pred, label)loss_train = loss.item()loss_sum += n * loss_trainn_sum += nloss.backward()grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),CFG['max_grad_norm'])optimizer.step()scheduler.step(iepoch * nbatch + ibatch + 1)val = evaluate(model, valid_loader)time_val += val['time']loss_train = loss_sum / n_sumdt = (time.time() - tb) / 60print('Epoch: %d Train Loss: %.4f Test Loss: %.4f Time: %.2f min' %(iepoch + 1, loss_train, val['loss'],dt))if val['loss'] < best_loss:best_loss = val['loss']# Save modelofilename = '/openbayes/home/best_model.pt'torch.save(model, ofilename)print(ofilename, 'written')del valgc.collect()dt = time.time() - tb
print(' %.2f min total, %.2f min val' % (dt / 60, time_val / 60))
gc.collect()

只保留權重可能會出現一些bug，保留整個模型比較穩妥

推理部分

這里我用TTA的版本

導入包

import os
import sys
sys.path.append('/kaggle/input/timm-0-6-9/pytorch-image-models-master')
import glob
import numpy as np
import pandas as pd
import random
import math
import gc
import cv2
from tqdm import tqdm
import time
from functools import lru_cache
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
import matplotlib.pyplot as plt
from sklearn.metrics import matthews_corrcoef

數據處理

這里基本和前面一樣，我全部放一起了

CFG = {'seed': 42,'model': 'convnext_small.fb_in1k','img_size': 256,'epochs': 10,'train_bs': 100, 'valid_bs': 64,'lr': 1e-3, 'weight_decay': 1e-6,'num_workers': 4
}
def seed_everything(seed):random.seed(seed)os.environ['PYTHONHASHSEED'] = str(seed)np.random.seed(seed)torch.manual_seed(seed)torch.cuda.manual_seed(seed)torch.cuda.manual_seed_all(seed)torch.backends.cudnn.deterministic = Truetorch.backends.cudnn.benchmark = Falseseed_everything(CFG['seed'])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def expand_contact_id(df):"""Splits out contact_id into seperate columns."""df["game_play"] = df["contact_id"].str[:12]df["step"] = df["contact_id"].str.split("_").str[-3].astype("int")df["nfl_player_id_1"] = df["contact_id"].str.split("_").str[-2]df["nfl_player_id_2"] = df["contact_id"].str.split("_").str[-1]return dflabels = expand_contact_id(pd.read_csv("/kaggle/input/nfl-player-contact-detection/sample_submission.csv"))test_tracking = pd.read_csv("/kaggle/input/nfl-player-contact-detection/test_player_tracking.csv")test_helmets = pd.read_csv("/kaggle/input/nfl-player-contact-detection/test_baseline_helmets.csv")test_video_metadata = pd.read_csv("/kaggle/input/nfl-player-contact-detection/test_video_metadata.csv")
!mkdir -p ../work/framesfor video in tqdm(test_helmets.video.unique()):if 'Endzone2' not in video:!ffmpeg -i /kaggle/input/nfl-player-contact-detection/test/{video} -q:v 2 -f image2 /kaggle/work/frames/{video}_%04d.jpg -hide_banner -loglevel error
def create_features(df, tr_tracking, merge_col="step", use_cols=["x_position", "y_position"]):output_cols = []df_combo = (df.astype({"nfl_player_id_1": "str"}).merge(tr_tracking.astype({"nfl_player_id": "str"})[["game_play", merge_col, "nfl_player_id",] + use_cols],left_on=["game_play", merge_col, "nfl_player_id_1"],right_on=["game_play", merge_col, "nfl_player_id"],how="left",).rename(columns={c: c+"_1" for c in use_cols}).drop("nfl_player_id", axis=1).merge(tr_tracking.astype({"nfl_player_id": "str"})[["game_play", merge_col, "nfl_player_id"] + use_cols],left_on=["game_play", merge_col, "nfl_player_id_2"],right_on=["game_play", merge_col, "nfl_player_id"],how="left",).drop("nfl_player_id", axis=1).rename(columns={c: c+"_2" for c in use_cols}).sort_values(["game_play", merge_col, "nfl_player_id_1", "nfl_player_id_2"]).reset_index(drop=True))output_cols += [c+"_1" for c in use_cols]output_cols += [c+"_2" for c in use_cols]if ("x_position" in use_cols) & ("y_position" in use_cols):index = df_combo['x_position_2'].notnull()distance_arr = np.full(len(index), np.nan)tmp_distance_arr = np.sqrt(np.square(df_combo.loc[index, "x_position_1"] - df_combo.loc[index, "x_position_2"])+ np.square(df_combo.loc[index, "y_position_1"]- df_combo.loc[index, "y_position_2"]))distance_arr[index] = tmp_distance_arrdf_combo['distance'] = distance_arroutput_cols += ["distance"]df_combo['G_flug'] = (df_combo['nfl_player_id_2']=="G")output_cols += ["G_flug"]return df_combo, output_colsuse_cols = ['x_position', 'y_position', 'speed', 'distance','direction', 'orientation', 'acceleration', 'sa'
]test, feature_cols = create_features(labels, test_tracking, use_cols=use_cols)
test
test_filtered = test.query('not distance>2').reset_index(drop=True)
test_filtered['frame'] = (test_filtered['step']/10*59.94+5*59.94).astype('int')+1
test_filtered
del test, labels, test_tracking
gc.collect()
train_aug = A.Compose([A.HorizontalFlip(p=0.5),A.ShiftScaleRotate(p=0.5),A.RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5),A.Normalize(mean=[0.], std=[1.]),ToTensorV2()
])valid_aug = A.Compose([A.Normalize(mean=[0.], std=[1.]),ToTensorV2()
])
video2helmets = {}
test_helmets_new = test_helmets.set_index('video')
for video in tqdm(test_helmets.video.unique()):video2helmets[video] = test_helmets_new.loc[video].reset_index(drop=True)del test_helmets, test_helmets_new
gc.collect()
video2frames = {}for game_play in tqdm(test_video_metadata.game_play.unique()):for view in ['Endzone', 'Sideline']:video = game_play + f'_{view}.mp4'video2frames[video] = max(list(map(lambda x:int(x.split('_')[-1].split('.')[0]), \glob.glob(f'/kaggle/work/frames/{video}*'))))
class MyDataset(Dataset):def __init__(self, df, aug=valid_aug, mode='train'):self.df = dfself.frame = df.frame.valuesself.feature = df[feature_cols].fillna(-1).valuesself.players = df[['nfl_player_id_1','nfl_player_id_2']].valuesself.game_play = df.game_play.valuesself.aug = augself.mode = modedef __len__(self):return len(self.df)# @lru_cache(1024)# def read_img(self, path):#     return cv2.imread(path, 0)def __getitem__(self, idx):   window = 24frame = self.frame[idx]if self.mode == 'train':frame = frame + random.randint(-6, 6)players = []for p in self.players[idx]:if p == 'G':players.append(p)else:players.append(int(p))imgs = []for view in ['Endzone', 'Sideline']:video = self.game_play[idx] + f'_{view}.mp4'tmp = video2helmets[video]
#             tmp = tmp.query('@frame-@window<=frame<=@frame+@window')tmp[tmp['frame'].between(frame-window, frame+window)]tmp = tmp[tmp.nfl_player_id.isin(players)]#.sort_values(['nfl_player_id', 'frame'])tmp_frames = tmp.frame.valuestmp = tmp.groupby('frame')[['left','width','top','height']].mean()
#0.002sbboxes = []for f in range(frame-window, frame+window+1, 1):if f in tmp_frames:x, w, y, h = tmp.loc[f][['left','width','top','height']]bboxes.append([x, w, y, h])else:bboxes.append([np.nan, np.nan, np.nan, np.nan])bboxes = pd.DataFrame(bboxes).interpolate(limit_direction='both').valuesbboxes = bboxes[::4]if bboxes.sum() > 0:flag = 1else:flag = 0
#0.03sfor i, f in enumerate(range(frame-window, frame+window+1, 4)):img_new = np.zeros((256, 256), dtype=np.float32)if flag == 1 and f <= video2frames[video]:img = cv2.imread(f'/kaggle/work/frames/{video}_{f:04d}.jpg', 0)x, w, y, h = bboxes[i]img = img[int(y+h/2)-128:int(y+h/2)+128,int(x+w/2)-128:int(x+w/2)+128].copy()img_new[:img.shape[0], :img.shape[1]] = imgimgs.append(img_new)
#0.06sfeature = np.float32(self.feature[idx])img = np.array(imgs).transpose(1, 2, 0)    img = self.aug(image=img)["image"]label = np.float32(self.df.contact.values[idx])return img, feature, label

查看截取出來的圖片

img, feature, label = MyDataset(test_filtered, valid_aug, 'test')[0]
plt.imshow(img.permute(1,2,0)[:,:,7])
plt.show()
img.shape, feature, label

進行推理

class Model(nn.Module):def __init__(self):super(Model, self).__init__()self.backbone = timm.create_model(CFG['model'], pretrained=False, num_classes=500, in_chans=13)self.mlp = nn.Sequential(nn.Linear(18, 64),nn.LayerNorm(64),nn.ReLU(),nn.Dropout(0.2),# nn.Linear(64, 64),# nn.LayerNorm(64),# nn.ReLU(),# nn.Dropout(0.2))self.fc = nn.Linear(64+500*2, 1)def forward(self, img, feature):b, c, h, w = img.shapeimg = img.reshape(b*2, c//2, h, w)img = self.backbone(img).reshape(b, -1)feature = self.mlp(feature)y = self.fc(torch.cat([img, feature], dim=1))return y
test_set = MyDataset(test_filtered, valid_aug, 'test')
test_loader = DataLoader(test_set, batch_size=CFG['valid_bs'], shuffle=False, num_workers=CFG['num_workers'], pin_memory=True)model = Model().to(device)
model = torch.load('/kaggle/input/track-weight/best_model.pt')model.eval()y_pred = []
with torch.no_grad():tk = tqdm(test_loader, total=len(test_loader))for step, batch in enumerate(tk):if(step % 4 != 3):img, feature, label = [x.to(device) for x in batch]output1 = model(img, feature).squeeze(-1)output2 = model(img.flip(-1), feature).squeeze(-1)y_pred.extend(0.2*(output1.sigmoid().cpu().numpy()) + 0.8*(output2.sigmoid().cpu().numpy()))else:img, feature, label = [x.to(device) for x in batch]output = model(img.flip(-1), feature).squeeze(-1)y_pred.extend(output.sigmoid().cpu().numpy())    y_pred = np.array(y_pred)

這里用了翻轉，tta算是一種隱式模型集成

提交

th = 0.29test_filtered['contact'] = (y_pred >= th).astype('int')sub = pd.read_csv('/kaggle/input/nfl-player-contact-detection/sample_submission.csv')sub = sub.drop("contact", axis=1).merge(test_filtered[['contact_id', 'contact']], how='left', on='contact_id')
sub['contact'] = sub['contact'].fillna(0).astype('int')sub[["contact_id", "contact"]].to_csv("submission.csv", index=False)sub.head()