mirror of
https://github.com/NohamR/Stage-2024.git
synced 2026-01-11 08:48:15 +00:00
init test
This commit is contained in:
32
test/yolov7-tracker/tracker/config_files/mot17.yaml
Normal file
32
test/yolov7-tracker/tracker/config_files/mot17.yaml
Normal file
@@ -0,0 +1,32 @@
|
||||
# Config file of MOT17 dataset
|
||||
|
||||
DATASET_ROOT: '/data/wujiapeng/datasets/MOT17' # your dataset root
|
||||
SPLIT: test
|
||||
CATEGORY_NAMES: # category names to show
|
||||
- 'pedestrian'
|
||||
|
||||
CATEGORY_DICT:
|
||||
0: 'pedestrian'
|
||||
|
||||
CERTAIN_SEQS:
|
||||
-
|
||||
IGNORE_SEQS: # Seqs you want to ignore
|
||||
-
|
||||
|
||||
YAML_DICT: '' # NOTE: ONLY for yolo v5 model loader(func DetectMultiBackend)
|
||||
|
||||
TRACK_EVAL: # If use TrackEval to evaluate, use these configs
|
||||
'DISPLAY_LESS_PROGRESS': False
|
||||
'GT_FOLDER': '/data/wujiapeng/datasets/MOT17/train'
|
||||
'TRACKERS_FOLDER': './tracker/results'
|
||||
'SKIP_SPLIT_FOL': True
|
||||
'TRACKER_SUB_FOLDER': ''
|
||||
'SEQ_INFO':
|
||||
'MOT17-02-SDP': null
|
||||
'MOT17-04-SDP': null
|
||||
'MOT17-05-SDP': null
|
||||
'MOT17-09-SDP': null
|
||||
'MOT17-10-SDP': null
|
||||
'MOT17-11-SDP': null
|
||||
'MOT17-13-SDP': null
|
||||
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt'
|
||||
26
test/yolov7-tracker/tracker/config_files/uavdt.yaml
Normal file
26
test/yolov7-tracker/tracker/config_files/uavdt.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
# Config file of UAVDT dataset
|
||||
|
||||
DATASET_ROOT: '/data/wujiapeng/datasets/UAVDT' # your dataset root
|
||||
SPLIT: test
|
||||
CATEGORY_NAMES: # category names to show
|
||||
- 'car'
|
||||
|
||||
CATEGORY_DICT:
|
||||
0: 'car'
|
||||
|
||||
CERTAIN_SEQS:
|
||||
-
|
||||
IGNORE_SEQS: # Seqs you want to ignore
|
||||
-
|
||||
|
||||
YAML_DICT: './data/UAVDT.yaml' # NOTE: ONLY for yolo v5 model loader(func DetectMultiBackend)
|
||||
|
||||
TRACK_EVAL: # If use TrackEval to evaluate, use these configs
|
||||
'DISPLAY_LESS_PROGRESS': False
|
||||
'GT_FOLDER': '/data/wujiapeng/datasets/UAVDT/UAV-benchmark-M'
|
||||
'TRACKERS_FOLDER': './tracker/results'
|
||||
'SKIP_SPLIT_FOL': True
|
||||
'TRACKER_SUB_FOLDER': ''
|
||||
'SEQ_INFO':
|
||||
'M0101': 407
|
||||
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt'
|
||||
61
test/yolov7-tracker/tracker/config_files/visdrone.yaml
Normal file
61
test/yolov7-tracker/tracker/config_files/visdrone.yaml
Normal file
@@ -0,0 +1,61 @@
|
||||
# Config file of VisDrone dataset
|
||||
|
||||
DATASET_ROOT: '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019'
|
||||
SPLIT: test
|
||||
CATEGORY_NAMES:
|
||||
- 'pedestrain'
|
||||
- 'people'
|
||||
- 'bicycle'
|
||||
- 'car'
|
||||
- 'van'
|
||||
- 'truck'
|
||||
- 'tricycle'
|
||||
- 'awning-tricycle'
|
||||
- 'bus'
|
||||
- 'motor'
|
||||
|
||||
CATEGORY_DICT:
|
||||
0: 'pedestrain'
|
||||
1: 'people'
|
||||
2: 'bicycle'
|
||||
3: 'car'
|
||||
4: 'van'
|
||||
5: 'truck'
|
||||
6: 'tricycle'
|
||||
7: 'awning-tricycle'
|
||||
8: 'bus'
|
||||
9: 'motor'
|
||||
|
||||
CERTAIN_SEQS:
|
||||
-
|
||||
|
||||
IGNORE_SEQS: # Seqs you want to ignore
|
||||
-
|
||||
|
||||
YAML_DICT: './data/Visdrone_all.yaml' # NOTE: ONLY for yolo v5 model loader(func DetectMultiBackend)
|
||||
|
||||
TRACK_EVAL: # If use TrackEval to evaluate, use these configs
|
||||
'DISPLAY_LESS_PROGRESS': False
|
||||
'GT_FOLDER': '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019/VisDrone2019-MOT-test-dev/annotations'
|
||||
'TRACKERS_FOLDER': './tracker/results'
|
||||
'SKIP_SPLIT_FOL': True
|
||||
'TRACKER_SUB_FOLDER': ''
|
||||
'SEQ_INFO':
|
||||
'uav0000009_03358_v': 219
|
||||
'uav0000073_00600_v': 328
|
||||
'uav0000073_04464_v': 312
|
||||
'uav0000077_00720_v': 780
|
||||
'uav0000088_00290_v': 296
|
||||
'uav0000119_02301_v': 179
|
||||
'uav0000120_04775_v': 1000
|
||||
'uav0000161_00000_v': 308
|
||||
'uav0000188_00000_v': 260
|
||||
'uav0000201_00000_v': 677
|
||||
'uav0000249_00001_v': 360
|
||||
'uav0000249_02688_v': 244
|
||||
'uav0000297_00000_v': 146
|
||||
'uav0000297_02761_v': 373
|
||||
'uav0000306_00230_v': 420
|
||||
'uav0000355_00001_v': 468
|
||||
'uav0000370_00001_v': 265
|
||||
'GT_LOC_FORMAT': '{gt_folder}/{seq}.txt'
|
||||
51
test/yolov7-tracker/tracker/config_files/visdrone_part.yaml
Normal file
51
test/yolov7-tracker/tracker/config_files/visdrone_part.yaml
Normal file
@@ -0,0 +1,51 @@
|
||||
# Config file of VisDrone dataset
|
||||
|
||||
DATASET_ROOT: '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019'
|
||||
SPLIT: test
|
||||
CATEGORY_NAMES:
|
||||
- 'pedestrain'
|
||||
- 'car'
|
||||
- 'van'
|
||||
- 'truck'
|
||||
- 'bus'
|
||||
|
||||
CATEGORY_DICT:
|
||||
0: 'pedestrain'
|
||||
1: 'car'
|
||||
2: 'van'
|
||||
3: 'truck'
|
||||
4: 'bus'
|
||||
|
||||
CERTAIN_SEQS:
|
||||
-
|
||||
|
||||
IGNORE_SEQS: # Seqs you want to ignore
|
||||
-
|
||||
|
||||
YAML_DICT: './data/Visdrone_all.yaml' # NOTE: ONLY for yolo v5 model loader(func DetectMultiBackend)
|
||||
|
||||
TRACK_EVAL: # If use TrackEval to evaluate, use these configs
|
||||
'DISPLAY_LESS_PROGRESS': False
|
||||
'GT_FOLDER': '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019/VisDrone2019-MOT-test-dev/annotations'
|
||||
'TRACKERS_FOLDER': './tracker/results'
|
||||
'SKIP_SPLIT_FOL': True
|
||||
'TRACKER_SUB_FOLDER': ''
|
||||
'SEQ_INFO':
|
||||
'uav0000009_03358_v': 219
|
||||
'uav0000073_00600_v': 328
|
||||
'uav0000073_04464_v': 312
|
||||
'uav0000077_00720_v': 780
|
||||
'uav0000088_00290_v': 296
|
||||
'uav0000119_02301_v': 179
|
||||
'uav0000120_04775_v': 1000
|
||||
'uav0000161_00000_v': 308
|
||||
'uav0000188_00000_v': 260
|
||||
'uav0000201_00000_v': 677
|
||||
'uav0000249_00001_v': 360
|
||||
'uav0000249_02688_v': 244
|
||||
'uav0000297_00000_v': 146
|
||||
'uav0000297_02761_v': 373
|
||||
'uav0000306_00230_v': 420
|
||||
'uav0000355_00001_v': 468
|
||||
'uav0000370_00001_v': 265
|
||||
'GT_LOC_FORMAT': '{gt_folder}/{seq}.txt'
|
||||
37
test/yolov7-tracker/tracker/my_timer.py
Normal file
37
test/yolov7-tracker/tracker/my_timer.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import time
|
||||
|
||||
|
||||
class Timer(object):
|
||||
"""A simple timer."""
|
||||
def __init__(self):
|
||||
self.total_time = 0.
|
||||
self.calls = 0
|
||||
self.start_time = 0.
|
||||
self.diff = 0.
|
||||
self.average_time = 0.
|
||||
|
||||
self.duration = 0.
|
||||
|
||||
def tic(self):
|
||||
# using time.time instead of time.clock because time time.clock
|
||||
# does not normalize for multithreading
|
||||
self.start_time = time.time()
|
||||
|
||||
def toc(self, average=True):
|
||||
self.diff = time.time() - self.start_time
|
||||
self.total_time += self.diff
|
||||
self.calls += 1
|
||||
self.average_time = self.total_time / self.calls
|
||||
if average:
|
||||
self.duration = self.average_time
|
||||
else:
|
||||
self.duration = self.diff
|
||||
return self.duration
|
||||
|
||||
def clear(self):
|
||||
self.total_time = 0.
|
||||
self.calls = 0
|
||||
self.start_time = 0.
|
||||
self.diff = 0.
|
||||
self.average_time = 0.
|
||||
self.duration = 0.
|
||||
305
test/yolov7-tracker/tracker/track.py
Normal file
305
test/yolov7-tracker/tracker/track.py
Normal file
@@ -0,0 +1,305 @@
|
||||
"""
|
||||
main code for track
|
||||
"""
|
||||
import sys, os
|
||||
import numpy as np
|
||||
import torch
|
||||
import cv2
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
import yaml
|
||||
|
||||
from loguru import logger
|
||||
import argparse
|
||||
|
||||
from tracking_utils.envs import select_device
|
||||
from tracking_utils.tools import *
|
||||
from tracking_utils.visualization import plot_img, save_video
|
||||
from my_timer import Timer
|
||||
|
||||
from tracker_dataloader import TestDataset
|
||||
|
||||
# trackers
|
||||
from trackers.byte_tracker import ByteTracker
|
||||
from trackers.sort_tracker import SortTracker
|
||||
from trackers.botsort_tracker import BotTracker
|
||||
from trackers.c_biou_tracker import C_BIoUTracker
|
||||
from trackers.ocsort_tracker import OCSortTracker
|
||||
from trackers.deepsort_tracker import DeepSortTracker
|
||||
from trackers.strongsort_tracker import StrongSortTracker
|
||||
from trackers.sparse_tracker import SparseTracker
|
||||
|
||||
# YOLOX modules
|
||||
try:
|
||||
from yolox.exp import get_exp
|
||||
from yolox_utils.postprocess import postprocess_yolox
|
||||
from yolox.utils import fuse_model
|
||||
except Exception as e:
|
||||
logger.warning(e)
|
||||
logger.warning('Load yolox fail. If you want to use yolox, please check the installation.')
|
||||
pass
|
||||
|
||||
# YOLOv7 modules
|
||||
try:
|
||||
sys.path.append(os.getcwd())
|
||||
from models.experimental import attempt_load
|
||||
from utils.torch_utils import select_device, time_synchronized, TracedModel
|
||||
from utils.general import non_max_suppression, scale_coords, check_img_size
|
||||
from yolov7_utils.postprocess import postprocess as postprocess_yolov7
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(e)
|
||||
logger.warning('Load yolov7 fail. If you want to use yolov7, please check the installation.')
|
||||
pass
|
||||
|
||||
# YOLOv8 modules
|
||||
try:
|
||||
from ultralytics import YOLO
|
||||
from yolov8_utils.postprocess import postprocess as postprocess_yolov8
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(e)
|
||||
logger.warning('Load yolov8 fail. If you want to use yolov8, please check the installation.')
|
||||
pass
|
||||
|
||||
TRACKER_DICT = {
|
||||
'sort': SortTracker,
|
||||
'bytetrack': ByteTracker,
|
||||
'botsort': BotTracker,
|
||||
'c_bioutrack': C_BIoUTracker,
|
||||
'ocsort': OCSortTracker,
|
||||
'deepsort': DeepSortTracker,
|
||||
'strongsort': StrongSortTracker,
|
||||
'sparsetrack': SparseTracker
|
||||
}
|
||||
|
||||
def get_args():
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
"""general"""
|
||||
parser.add_argument('--dataset', type=str, default='visdrone_part', help='visdrone, mot17, etc.')
|
||||
parser.add_argument('--detector', type=str, default='yolov8', help='yolov7, yolox, etc.')
|
||||
parser.add_argument('--tracker', type=str, default='sort', help='sort, deepsort, etc')
|
||||
parser.add_argument('--reid_model', type=str, default='osnet_x0_25', help='osnet or deppsort')
|
||||
|
||||
parser.add_argument('--kalman_format', type=str, default='default', help='use what kind of Kalman, sort, deepsort, byte, etc.')
|
||||
parser.add_argument('--img_size', type=int, default=1280, help='image size, [h, w]')
|
||||
|
||||
parser.add_argument('--conf_thresh', type=float, default=0.2, help='filter tracks')
|
||||
parser.add_argument('--nms_thresh', type=float, default=0.7, help='thresh for NMS')
|
||||
parser.add_argument('--iou_thresh', type=float, default=0.5, help='IOU thresh to filter tracks')
|
||||
|
||||
parser.add_argument('--device', type=str, default='6', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
|
||||
"""yolox"""
|
||||
parser.add_argument('--yolox_exp_file', type=str, default='./tracker/yolox_utils/yolox_m.py')
|
||||
|
||||
"""model path"""
|
||||
parser.add_argument('--detector_model_path', type=str, default='./weights/best.pt', help='model path')
|
||||
parser.add_argument('--trace', type=bool, default=False, help='traced model of YOLO v7')
|
||||
# other model path
|
||||
parser.add_argument('--reid_model_path', type=str, default='./weights/osnet_x0_25.pth', help='path for reid model path')
|
||||
parser.add_argument('--dhn_path', type=str, default='./weights/DHN.pth', help='path of DHN path for DeepMOT')
|
||||
|
||||
|
||||
"""other options"""
|
||||
parser.add_argument('--discard_reid', action='store_true', help='discard reid model, only work in bot-sort etc. which need a reid part')
|
||||
parser.add_argument('--track_buffer', type=int, default=30, help='tracking buffer')
|
||||
parser.add_argument('--gamma', type=float, default=0.1, help='param to control fusing motion and apperance dist')
|
||||
parser.add_argument('--min_area', type=float, default=150, help='use to filter small bboxs')
|
||||
|
||||
parser.add_argument('--save_dir', type=str, default='track_results/{dataset_name}/{split}')
|
||||
parser.add_argument('--save_images', action='store_true', help='save tracking results (image)')
|
||||
parser.add_argument('--save_videos', action='store_true', help='save tracking results (video)')
|
||||
|
||||
parser.add_argument('--track_eval', type=bool, default=True, help='Use TrackEval to evaluate')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
def main(args, dataset_cfgs):
|
||||
|
||||
"""1. set some params"""
|
||||
|
||||
# NOTE: if save video, you must save image
|
||||
if args.save_videos:
|
||||
args.save_images = True
|
||||
|
||||
"""2. load detector"""
|
||||
device = select_device(args.device)
|
||||
|
||||
if args.detector == 'yolox':
|
||||
|
||||
exp = get_exp(args.yolox_exp_file, None) # TODO: modify num_classes etc. for specific dataset
|
||||
model_img_size = exp.input_size
|
||||
model = exp.get_model()
|
||||
model.to(device)
|
||||
model.eval()
|
||||
|
||||
logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
|
||||
ckpt = torch.load(args.detector_model_path, map_location=device)
|
||||
model.load_state_dict(ckpt['model'])
|
||||
logger.info("loaded checkpoint done")
|
||||
model = fuse_model(model)
|
||||
|
||||
stride = None # match with yolo v7
|
||||
|
||||
logger.info(f'Now detector is on device {next(model.parameters()).device}')
|
||||
|
||||
elif args.detector == 'yolov7':
|
||||
|
||||
logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
|
||||
model = attempt_load(args.detector_model_path, map_location=device)
|
||||
|
||||
# get inference img size
|
||||
stride = int(model.stride.max()) # model stride
|
||||
model_img_size = check_img_size(args.img_size, s=stride) # check img_size
|
||||
|
||||
# Traced model
|
||||
model = TracedModel(model, device=device, img_size=args.img_size)
|
||||
# model.half()
|
||||
|
||||
logger.info("loaded checkpoint done")
|
||||
|
||||
logger.info(f'Now detector is on device {next(model.parameters()).device}')
|
||||
|
||||
elif args.detector == 'yolov8':
|
||||
|
||||
logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
|
||||
model = YOLO(args.detector_model_path)
|
||||
|
||||
model_img_size = [None, None]
|
||||
stride = None
|
||||
|
||||
logger.info("loaded checkpoint done")
|
||||
|
||||
else:
|
||||
logger.error(f"detector {args.detector} is not supprted")
|
||||
exit(0)
|
||||
|
||||
"""3. load sequences"""
|
||||
DATA_ROOT = dataset_cfgs['DATASET_ROOT']
|
||||
SPLIT = dataset_cfgs['SPLIT']
|
||||
|
||||
seqs = sorted(os.listdir(os.path.join(DATA_ROOT, 'images', SPLIT)))
|
||||
seqs = [seq for seq in seqs if seq not in dataset_cfgs['IGNORE_SEQS']]
|
||||
if not None in dataset_cfgs['CERTAIN_SEQS']:
|
||||
seqs = dataset_cfgs['CERTAIN_SEQS']
|
||||
|
||||
logger.info(f'Total {len(seqs)} seqs will be tracked: {seqs}')
|
||||
|
||||
save_dir = args.save_dir.format(dataset_name=args.dataset, split=SPLIT)
|
||||
|
||||
|
||||
"""4. Tracking"""
|
||||
|
||||
# set timer
|
||||
timer = Timer()
|
||||
seq_fps = []
|
||||
|
||||
for seq in seqs:
|
||||
logger.info(f'--------------tracking seq {seq}--------------')
|
||||
|
||||
dataset = TestDataset(DATA_ROOT, SPLIT, seq_name=seq, img_size=model_img_size, model=args.detector, stride=stride)
|
||||
|
||||
data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)
|
||||
|
||||
tracker = TRACKER_DICT[args.tracker](args, )
|
||||
|
||||
process_bar = enumerate(data_loader)
|
||||
process_bar = tqdm(process_bar, total=len(data_loader), ncols=150)
|
||||
|
||||
results = []
|
||||
|
||||
for frame_idx, (ori_img, img) in process_bar:
|
||||
|
||||
# start timing this frame
|
||||
timer.tic()
|
||||
|
||||
if args.detector == 'yolov8':
|
||||
img = img.squeeze(0).cpu().numpy()
|
||||
|
||||
else:
|
||||
img = img.to(device) # (1, C, H, W)
|
||||
img = img.float()
|
||||
|
||||
ori_img = ori_img.squeeze(0)
|
||||
|
||||
# get detector output
|
||||
with torch.no_grad():
|
||||
if args.detector == 'yolov8':
|
||||
output = model.predict(img, conf=args.conf_thresh, iou=args.nms_thresh)
|
||||
else:
|
||||
output = model(img)
|
||||
|
||||
# postprocess output to original scales
|
||||
if args.detector == 'yolox':
|
||||
output = postprocess_yolox(output, len(dataset_cfgs['CATEGORY_NAMES']), conf_thresh=args.conf_thresh,
|
||||
img=img, ori_img=ori_img)
|
||||
|
||||
elif args.detector == 'yolov7':
|
||||
output = postprocess_yolov7(output, args.conf_thresh, args.nms_thresh, img.shape[2:], ori_img.shape)
|
||||
|
||||
elif args.detector == 'yolov8':
|
||||
output = postprocess_yolov8(output)
|
||||
|
||||
else: raise NotImplementedError
|
||||
|
||||
# output: (tlbr, conf, cls)
|
||||
# convert tlbr to tlwh
|
||||
if isinstance(output, torch.Tensor):
|
||||
output = output.detach().cpu().numpy()
|
||||
output[:, 2] -= output[:, 0]
|
||||
output[:, 3] -= output[:, 1]
|
||||
current_tracks = tracker.update(output, img, ori_img.cpu().numpy())
|
||||
|
||||
# save results
|
||||
cur_tlwh, cur_id, cur_cls, cur_score = [], [], [], []
|
||||
for trk in current_tracks:
|
||||
bbox = trk.tlwh
|
||||
id = trk.track_id
|
||||
cls = trk.category
|
||||
score = trk.score
|
||||
|
||||
# filter low area bbox
|
||||
if bbox[2] * bbox[3] > args.min_area:
|
||||
cur_tlwh.append(bbox)
|
||||
cur_id.append(id)
|
||||
cur_cls.append(cls)
|
||||
cur_score.append(score)
|
||||
# results.append((frame_id + 1, id, bbox, cls))
|
||||
|
||||
results.append((frame_idx + 1, cur_id, cur_tlwh, cur_cls, cur_score))
|
||||
|
||||
timer.toc()
|
||||
|
||||
if args.save_images:
|
||||
plot_img(img=ori_img, frame_id=frame_idx, results=[cur_tlwh, cur_id, cur_cls],
|
||||
save_dir=os.path.join(save_dir, 'vis_results'))
|
||||
|
||||
save_results(folder_name=os.path.join(args.dataset, SPLIT),
|
||||
seq_name=seq,
|
||||
results=results)
|
||||
|
||||
# show the fps
|
||||
seq_fps.append(frame_idx / timer.total_time)
|
||||
logger.info(f'fps of seq {seq}: {seq_fps[-1]}')
|
||||
timer.clear()
|
||||
|
||||
if args.save_videos:
|
||||
save_video(images_path=os.path.join(save_dir, 'vis_results'))
|
||||
logger.info(f'save video of {seq} done')
|
||||
|
||||
# show the average fps
|
||||
logger.info(f'average fps: {np.mean(seq_fps)}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
args = get_args()
|
||||
|
||||
with open(f'./tracker/config_files/{args.dataset}.yaml', 'r') as f:
|
||||
cfgs = yaml.load(f, Loader=yaml.FullLoader)
|
||||
|
||||
|
||||
main(args, cfgs)
|
||||
266
test/yolov7-tracker/tracker/track_demo.py
Normal file
266
test/yolov7-tracker/tracker/track_demo.py
Normal file
@@ -0,0 +1,266 @@
|
||||
"""
|
||||
main code for track
|
||||
"""
|
||||
import sys, os
|
||||
import numpy as np
|
||||
import torch
|
||||
import cv2
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
import yaml
|
||||
|
||||
from loguru import logger
|
||||
import argparse
|
||||
|
||||
from tracking_utils.envs import select_device
|
||||
from tracking_utils.tools import *
|
||||
from tracking_utils.visualization import plot_img, save_video
|
||||
|
||||
from tracker_dataloader import TestDataset, DemoDataset
|
||||
|
||||
# trackers
|
||||
from trackers.byte_tracker import ByteTracker
|
||||
from trackers.sort_tracker import SortTracker
|
||||
from trackers.botsort_tracker import BotTracker
|
||||
from trackers.c_biou_tracker import C_BIoUTracker
|
||||
from trackers.ocsort_tracker import OCSortTracker
|
||||
from trackers.deepsort_tracker import DeepSortTracker
|
||||
|
||||
# YOLOX modules
|
||||
try:
|
||||
from yolox.exp import get_exp
|
||||
from yolox_utils.postprocess import postprocess_yolox
|
||||
from yolox.utils import fuse_model
|
||||
except Exception as e:
|
||||
logger.warning(e)
|
||||
logger.warning('Load yolox fail. If you want to use yolox, please check the installation.')
|
||||
pass
|
||||
|
||||
# YOLOv7 modules
|
||||
try:
|
||||
sys.path.append(os.getcwd())
|
||||
from models.experimental import attempt_load
|
||||
from utils.torch_utils import select_device, time_synchronized, TracedModel
|
||||
from utils.general import non_max_suppression, scale_coords, check_img_size
|
||||
from yolov7_utils.postprocess import postprocess as postprocess_yolov7
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(e)
|
||||
logger.warning('Load yolov7 fail. If you want to use yolov7, please check the installation.')
|
||||
pass
|
||||
|
||||
# YOLOv8 modules
|
||||
try:
|
||||
from ultralytics import YOLO
|
||||
from yolov8_utils.postprocess import postprocess as postprocess_yolov8
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(e)
|
||||
logger.warning('Load yolov8 fail. If you want to use yolov8, please check the installation.')
|
||||
pass
|
||||
|
||||
TRACKER_DICT = {
|
||||
'sort': SortTracker,
|
||||
'bytetrack': ByteTracker,
|
||||
'botsort': BotTracker,
|
||||
'c_bioutrack': C_BIoUTracker,
|
||||
'ocsort': OCSortTracker,
|
||||
'deepsort': DeepSortTracker
|
||||
}
|
||||
|
||||
def get_args():
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
"""general"""
|
||||
parser.add_argument('--obj', type=str, required=True, default='demo.mp4', help='video or images folder PATH')
|
||||
|
||||
parser.add_argument('--detector', type=str, default='yolov8', help='yolov7, yolox, etc.')
|
||||
parser.add_argument('--tracker', type=str, default='sort', help='sort, deepsort, etc')
|
||||
parser.add_argument('--reid_model', type=str, default='osnet_x0_25', help='osnet or deppsort')
|
||||
|
||||
parser.add_argument('--kalman_format', type=str, default='default', help='use what kind of Kalman, sort, deepsort, byte, etc.')
|
||||
parser.add_argument('--img_size', type=int, default=1280, help='image size, [h, w]')
|
||||
|
||||
parser.add_argument('--conf_thresh', type=float, default=0.2, help='filter tracks')
|
||||
parser.add_argument('--nms_thresh', type=float, default=0.7, help='thresh for NMS')
|
||||
parser.add_argument('--iou_thresh', type=float, default=0.5, help='IOU thresh to filter tracks')
|
||||
|
||||
parser.add_argument('--device', type=str, default='6', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
|
||||
"""yolox"""
|
||||
parser.add_argument('--num_classes', type=int, default=1)
|
||||
parser.add_argument('--yolox_exp_file', type=str, default='./tracker/yolox_utils/yolox_m.py')
|
||||
|
||||
"""model path"""
|
||||
parser.add_argument('--detector_model_path', type=str, default='./weights/best.pt', help='model path')
|
||||
parser.add_argument('--trace', type=bool, default=False, help='traced model of YOLO v7')
|
||||
# other model path
|
||||
parser.add_argument('--reid_model_path', type=str, default='./weights/osnet_x0_25.pth', help='path for reid model path')
|
||||
parser.add_argument('--dhn_path', type=str, default='./weights/DHN.pth', help='path of DHN path for DeepMOT')
|
||||
|
||||
|
||||
"""other options"""
|
||||
parser.add_argument('--discard_reid', action='store_true', help='discard reid model, only work in bot-sort etc. which need a reid part')
|
||||
parser.add_argument('--track_buffer', type=int, default=30, help='tracking buffer')
|
||||
parser.add_argument('--gamma', type=float, default=0.1, help='param to control fusing motion and apperance dist')
|
||||
parser.add_argument('--min_area', type=float, default=150, help='use to filter small bboxs')
|
||||
|
||||
parser.add_argument('--save_dir', type=str, default='track_demo_results')
|
||||
parser.add_argument('--save_images', action='store_true', help='save tracking results (image)')
|
||||
parser.add_argument('--save_videos', action='store_true', help='save tracking results (video)')
|
||||
|
||||
parser.add_argument('--track_eval', type=bool, default=True, help='Use TrackEval to evaluate')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
def main(args):
|
||||
|
||||
"""1. set some params"""
|
||||
|
||||
# NOTE: if save video, you must save image
|
||||
if args.save_videos:
|
||||
args.save_images = True
|
||||
|
||||
"""2. load detector"""
|
||||
device = select_device(args.device)
|
||||
|
||||
if args.detector == 'yolox':
|
||||
|
||||
exp = get_exp(args.yolox_exp_file, None) # TODO: modify num_classes etc. for specific dataset
|
||||
model_img_size = exp.input_size
|
||||
model = exp.get_model()
|
||||
model.to(device)
|
||||
model.eval()
|
||||
|
||||
logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
|
||||
ckpt = torch.load(args.detector_model_path, map_location=device)
|
||||
model.load_state_dict(ckpt['model'])
|
||||
logger.info("loaded checkpoint done")
|
||||
model = fuse_model(model)
|
||||
|
||||
stride = None # match with yolo v7
|
||||
|
||||
logger.info(f'Now detector is on device {next(model.parameters()).device}')
|
||||
|
||||
elif args.detector == 'yolov7':
|
||||
|
||||
logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
|
||||
model = attempt_load(args.detector_model_path, map_location=device)
|
||||
|
||||
# get inference img size
|
||||
stride = int(model.stride.max()) # model stride
|
||||
model_img_size = check_img_size(args.img_size, s=stride) # check img_size
|
||||
|
||||
# Traced model
|
||||
model = TracedModel(model, device=device, img_size=args.img_size)
|
||||
# model.half()
|
||||
|
||||
logger.info("loaded checkpoint done")
|
||||
|
||||
logger.info(f'Now detector is on device {next(model.parameters()).device}')
|
||||
|
||||
elif args.detector == 'yolov8':
|
||||
|
||||
logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
|
||||
model = YOLO(args.detector_model_path)
|
||||
|
||||
model_img_size = [None, None]
|
||||
stride = None
|
||||
|
||||
logger.info("loaded checkpoint done")
|
||||
|
||||
else:
|
||||
logger.error(f"detector {args.detector} is not supprted")
|
||||
exit(0)
|
||||
|
||||
"""3. load sequences"""
|
||||
|
||||
dataset = DemoDataset(file_name=args.obj, img_size=model_img_size, model=args.detector, stride=stride, )
|
||||
data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)
|
||||
|
||||
tracker = TRACKER_DICT[args.tracker](args, )
|
||||
|
||||
|
||||
save_dir = args.save_dir
|
||||
|
||||
process_bar = enumerate(data_loader)
|
||||
process_bar = tqdm(process_bar, total=len(data_loader), ncols=150)
|
||||
|
||||
results = []
|
||||
|
||||
"""4. Tracking"""
|
||||
|
||||
for frame_idx, (ori_img, img) in process_bar:
|
||||
if args.detector == 'yolov8':
|
||||
img = img.squeeze(0).cpu().numpy()
|
||||
|
||||
else:
|
||||
img = img.to(device) # (1, C, H, W)
|
||||
img = img.float()
|
||||
|
||||
ori_img = ori_img.squeeze(0)
|
||||
|
||||
# get detector output
|
||||
with torch.no_grad():
|
||||
if args.detector == 'yolov8':
|
||||
output = model.predict(img, conf=args.conf_thresh, iou=args.nms_thresh)
|
||||
else:
|
||||
output = model(img)
|
||||
|
||||
# postprocess output to original scales
|
||||
if args.detector == 'yolox':
|
||||
output = postprocess_yolox(output, args.num_classes, conf_thresh=args.conf_thresh,
|
||||
img=img, ori_img=ori_img)
|
||||
|
||||
elif args.detector == 'yolov7':
|
||||
output = postprocess_yolov7(output, args.conf_thresh, args.nms_thresh, img.shape[2:], ori_img.shape)
|
||||
|
||||
elif args.detector == 'yolov8':
|
||||
output = postprocess_yolov8(output)
|
||||
|
||||
else: raise NotImplementedError
|
||||
|
||||
# output: (tlbr, conf, cls)
|
||||
# convert tlbr to tlwh
|
||||
if isinstance(output, torch.Tensor):
|
||||
output = output.detach().cpu().numpy()
|
||||
output[:, 2] -= output[:, 0]
|
||||
output[:, 3] -= output[:, 1]
|
||||
current_tracks = tracker.update(output, img, ori_img.cpu().numpy())
|
||||
|
||||
# save results
|
||||
cur_tlwh, cur_id, cur_cls, cur_score = [], [], [], []
|
||||
for trk in current_tracks:
|
||||
bbox = trk.tlwh
|
||||
id = trk.track_id
|
||||
cls = trk.category
|
||||
score = trk.score
|
||||
|
||||
# filter low area bbox
|
||||
if bbox[2] * bbox[3] > args.min_area:
|
||||
cur_tlwh.append(bbox)
|
||||
cur_id.append(id)
|
||||
cur_cls.append(cls)
|
||||
cur_score.append(score)
|
||||
# results.append((frame_id + 1, id, bbox, cls))
|
||||
|
||||
results.append((frame_idx + 1, cur_id, cur_tlwh, cur_cls, cur_score))
|
||||
|
||||
if args.save_images:
|
||||
plot_img(img=ori_img, frame_id=frame_idx, results=[cur_tlwh, cur_id, cur_cls],
|
||||
save_dir=os.path.join(save_dir, 'vis_results'))
|
||||
|
||||
save_results(folder_name=os.path.join(save_dir, 'txt_results'),
|
||||
seq_name='demo',
|
||||
results=results)
|
||||
|
||||
if args.save_videos:
|
||||
save_video(images_path=os.path.join(save_dir, 'vis_results'))
|
||||
logger.info(f'save video done')
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
args = get_args()
|
||||
|
||||
main(args)
|
||||
223
test/yolov7-tracker/tracker/tracker_dataloader.py
Normal file
223
test/yolov7-tracker/tracker/tracker_dataloader.py
Normal file
@@ -0,0 +1,223 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
import cv2
|
||||
import os
|
||||
import os.path as osp
|
||||
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
|
||||
class TestDataset(Dataset):
|
||||
""" This class generate origin image, preprocessed image for inference
|
||||
NOTE: for every sequence, initialize a TestDataset class
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, data_root, split, seq_name, img_size=[640, 640], legacy_yolox=True, model='yolox', **kwargs) -> None:
|
||||
"""
|
||||
Args:
|
||||
data_root: path for entire dataset
|
||||
seq_name: name of sequence
|
||||
img_size: List[int, int] | Tuple[int, int] image size for detection model
|
||||
legacy_yolox: bool, to be compatible with older versions of yolox
|
||||
model: detection model, currently support x, v7, v8
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
self.model = model
|
||||
|
||||
self.data_root = data_root
|
||||
self.seq_name = seq_name
|
||||
self.img_size = img_size
|
||||
self.split = split
|
||||
|
||||
self.seq_path = osp.join(self.data_root, 'images', self.split, self.seq_name)
|
||||
self.imgs_in_seq = sorted(os.listdir(self.seq_path))
|
||||
|
||||
self.legacy = legacy_yolox
|
||||
|
||||
self.other_param = kwargs
|
||||
|
||||
def __getitem__(self, idx):
|
||||
|
||||
if self.model == 'yolox':
|
||||
return self._getitem_yolox(idx)
|
||||
elif self.model == 'yolov7':
|
||||
return self._getitem_yolov7(idx)
|
||||
elif self.model == 'yolov8':
|
||||
return self._getitem_yolov8(idx)
|
||||
|
||||
def _getitem_yolox(self, idx):
|
||||
|
||||
img = cv2.imread(osp.join(self.seq_path, self.imgs_in_seq[idx]))
|
||||
img_resized, _ = self._preprocess_yolox(img, self.img_size, )
|
||||
if self.legacy:
|
||||
img_resized = img_resized[::-1, :, :].copy() # BGR -> RGB
|
||||
img_resized /= 255.0
|
||||
img_resized -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
|
||||
img_resized /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
|
||||
|
||||
return torch.from_numpy(img), torch.from_numpy(img_resized)
|
||||
|
||||
def _getitem_yolov7(self, idx):
|
||||
|
||||
img = cv2.imread(osp.join(self.seq_path, self.imgs_in_seq[idx]))
|
||||
|
||||
img_resized = self._preprocess_yolov7(img, ) # torch.Tensor
|
||||
|
||||
return torch.from_numpy(img), img_resized
|
||||
|
||||
def _getitem_yolov8(self, idx):
|
||||
|
||||
img = cv2.imread(osp.join(self.seq_path, self.imgs_in_seq[idx])) # (h, w, c)
|
||||
# img = self._preprocess_yolov8(img)
|
||||
|
||||
return torch.from_numpy(img), torch.from_numpy(img)
|
||||
|
||||
|
||||
def _preprocess_yolox(self, img, size, swap=(2, 0, 1)):
|
||||
""" convert origin image to resized image, YOLOX-manner
|
||||
|
||||
Args:
|
||||
img: np.ndarray
|
||||
size: List[int, int] | Tuple[int, int]
|
||||
swap: (H, W, C) -> (C, H, W)
|
||||
|
||||
Returns:
|
||||
np.ndarray, float
|
||||
|
||||
"""
|
||||
if len(img.shape) == 3:
|
||||
padded_img = np.ones((size[0], size[1], 3), dtype=np.uint8) * 114
|
||||
else:
|
||||
padded_img = np.ones(size, dtype=np.uint8) * 114
|
||||
|
||||
r = min(size[0] / img.shape[0], size[1] / img.shape[1])
|
||||
resized_img = cv2.resize(
|
||||
img,
|
||||
(int(img.shape[1] * r), int(img.shape[0] * r)),
|
||||
interpolation=cv2.INTER_LINEAR,
|
||||
).astype(np.uint8)
|
||||
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
|
||||
|
||||
padded_img = padded_img.transpose(swap)
|
||||
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
|
||||
return padded_img, r
|
||||
|
||||
def _preprocess_yolov7(self, img, ):
|
||||
|
||||
img_resized = self._letterbox(img, new_shape=self.img_size, stride=self.other_param['stride'], )[0]
|
||||
img_resized = img_resized[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
|
||||
img_resized = np.ascontiguousarray(img_resized)
|
||||
|
||||
img_resized = torch.from_numpy(img_resized).float()
|
||||
img_resized /= 255.0
|
||||
|
||||
return img_resized
|
||||
|
||||
def _preprocess_yolov8(self, img, ):
|
||||
|
||||
img = img.transpose((2, 0, 1))
|
||||
img = np.ascontiguousarray(img)
|
||||
|
||||
return img
|
||||
|
||||
|
||||
def _letterbox(self, img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
|
||||
# Resize and pad image while meeting stride-multiple constraints
|
||||
shape = img.shape[:2] # current shape [height, width]
|
||||
if isinstance(new_shape, int):
|
||||
new_shape = (new_shape, new_shape)
|
||||
|
||||
# Scale ratio (new / old)
|
||||
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
||||
if not scaleup: # only scale down, do not scale up (for better test mAP)
|
||||
r = min(r, 1.0)
|
||||
|
||||
# Compute padding
|
||||
ratio = r, r # width, height ratios
|
||||
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
||||
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
|
||||
if auto: # minimum rectangle
|
||||
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
|
||||
elif scaleFill: # stretch
|
||||
dw, dh = 0.0, 0.0
|
||||
new_unpad = (new_shape[1], new_shape[0])
|
||||
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
|
||||
|
||||
dw /= 2 # divide padding into 2 sides
|
||||
dh /= 2
|
||||
|
||||
if shape[::-1] != new_unpad: # resize
|
||||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
||||
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
||||
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
||||
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
|
||||
return img, ratio, (dw, dh)
|
||||
|
||||
def __len__(self, ):
|
||||
return len(self.imgs_in_seq)
|
||||
|
||||
|
||||
class DemoDataset(TestDataset):
|
||||
"""
|
||||
dataset for demo
|
||||
"""
|
||||
def __init__(self, file_name, img_size=[640, 640], model='yolox', legacy_yolox=True, **kwargs) -> None:
|
||||
|
||||
self.file_name = file_name
|
||||
self.model = model
|
||||
self.img_size = img_size
|
||||
|
||||
self.is_video = '.mp4' in file_name or '.avi' in file_name
|
||||
|
||||
if not self.is_video:
|
||||
self.imgs_in_seq = sorted(os.listdir(file_name))
|
||||
else:
|
||||
self.imgs_in_seq = []
|
||||
self.cap = cv2.VideoCapture(file_name)
|
||||
|
||||
while True:
|
||||
ret, frame = self.cap.read()
|
||||
if not ret: break
|
||||
|
||||
self.imgs_in_seq.append(frame)
|
||||
|
||||
self.legacy = legacy_yolox
|
||||
|
||||
def __getitem__(self, idx):
|
||||
|
||||
if not self.is_video:
|
||||
img = cv2.imread(osp.join(self.file_name, self.imgs_in_seq[idx]))
|
||||
else:
|
||||
img = self.imgs_in_seq[idx]
|
||||
|
||||
if self.model == 'yolox':
|
||||
return self._getitem_yolox(img)
|
||||
elif self.model == 'yolov7':
|
||||
return self._getitem_yolov7(img)
|
||||
elif self.model == 'yolov8':
|
||||
return self._getitem_yolov8(img)
|
||||
|
||||
def _getitem_yolox(self, img):
|
||||
|
||||
img_resized, _ = self._preprocess_yolox(img, self.img_size, )
|
||||
if self.legacy:
|
||||
img_resized = img_resized[::-1, :, :].copy() # BGR -> RGB
|
||||
img_resized /= 255.0
|
||||
img_resized -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
|
||||
img_resized /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
|
||||
|
||||
return torch.from_numpy(img), torch.from_numpy(img_resized)
|
||||
|
||||
def _getitem_yolov7(self, img):
|
||||
|
||||
img_resized = self._preprocess_yolov7(img, ) # torch.Tensor
|
||||
|
||||
return torch.from_numpy(img), img_resized
|
||||
|
||||
def _getitem_yolov8(self, img):
|
||||
|
||||
# img = self._preprocess_yolov8(img)
|
||||
|
||||
return torch.from_numpy(img), torch.from_numpy(img)
|
||||
133
test/yolov7-tracker/tracker/trackers/basetrack.py
Normal file
133
test/yolov7-tracker/tracker/trackers/basetrack.py
Normal file
@@ -0,0 +1,133 @@
|
||||
import numpy as np
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class TrackState(object):
|
||||
New = 0
|
||||
Tracked = 1
|
||||
Lost = 2
|
||||
Removed = 3
|
||||
|
||||
|
||||
class BaseTrack(object):
|
||||
_count = 0
|
||||
|
||||
track_id = 0
|
||||
is_activated = False
|
||||
state = TrackState.New
|
||||
|
||||
history = OrderedDict()
|
||||
features = []
|
||||
curr_feature = None
|
||||
score = 0
|
||||
start_frame = 0
|
||||
frame_id = 0
|
||||
time_since_update = 0
|
||||
|
||||
# multi-camera
|
||||
location = (np.inf, np.inf)
|
||||
|
||||
@property
|
||||
def end_frame(self):
|
||||
return self.frame_id
|
||||
|
||||
@staticmethod
|
||||
def next_id():
|
||||
BaseTrack._count += 1
|
||||
return BaseTrack._count
|
||||
|
||||
def activate(self, *args):
|
||||
raise NotImplementedError
|
||||
|
||||
def predict(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def update(self, *args, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
def mark_lost(self):
|
||||
self.state = TrackState.Lost
|
||||
|
||||
def mark_removed(self):
|
||||
self.state = TrackState.Removed
|
||||
|
||||
@property
|
||||
def tlwh(self):
|
||||
"""Get current position in bounding box format `(top left x, top left y,
|
||||
width, height)`.
|
||||
"""
|
||||
if self.mean is None:
|
||||
return self._tlwh.copy()
|
||||
ret = self.mean[:4].copy()
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
@property
|
||||
def tlbr(self):
|
||||
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
|
||||
`(top left, bottom right)`.
|
||||
"""
|
||||
ret = self.tlwh.copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
@property
|
||||
def xywh(self):
|
||||
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
|
||||
`(top left, bottom right)`.
|
||||
"""
|
||||
ret = self.tlwh.copy()
|
||||
ret[:2] += ret[2:] / 2.0
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
# @jit(nopython=True)
|
||||
def tlwh_to_xyah(tlwh):
|
||||
"""Convert bounding box to format `(center x, center y, aspect ratio,
|
||||
height)`, where the aspect ratio is `width / height`.
|
||||
"""
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[:2] += ret[2:] / 2
|
||||
ret[2] /= ret[3]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def tlwh_to_xywh(tlwh):
|
||||
"""Convert bounding box to format `(center x, center y, width,
|
||||
height)`.
|
||||
"""
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[:2] += ret[2:] / 2
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def tlwh_to_xysa(tlwh):
|
||||
"""Convert bounding box to format `(center x, center y, width,
|
||||
height)`.
|
||||
"""
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[:2] += ret[2:] / 2
|
||||
ret[2] = tlwh[2] * tlwh[3]
|
||||
ret[3] = tlwh[2] / tlwh[3]
|
||||
return ret
|
||||
|
||||
def to_xyah(self):
|
||||
return self.tlwh_to_xyah(self.tlwh)
|
||||
|
||||
def to_xywh(self):
|
||||
return self.tlwh_to_xywh(self.tlwh)
|
||||
|
||||
@staticmethod
|
||||
def tlbr_to_tlwh(tlbr):
|
||||
ret = np.asarray(tlbr).copy()
|
||||
ret[2:] -= ret[:2]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
# @jit(nopython=True)
|
||||
def tlwh_to_tlbr(tlwh):
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
|
||||
def __repr__(self):
|
||||
return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
|
||||
329
test/yolov7-tracker/tracker/trackers/botsort_tracker.py
Normal file
329
test/yolov7-tracker/tracker/trackers/botsort_tracker.py
Normal file
@@ -0,0 +1,329 @@
|
||||
"""
|
||||
Bot sort
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchvision.ops import nms
|
||||
|
||||
import cv2
|
||||
import torchvision.transforms as T
|
||||
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet, Tracklet_w_reid
|
||||
from .matching import *
|
||||
|
||||
from .reid_models.OSNet import *
|
||||
from .reid_models.load_model_tools import load_pretrained_weights
|
||||
from .reid_models.deepsort_reid import Extractor
|
||||
|
||||
from .camera_motion_compensation import GMC
|
||||
|
||||
REID_MODEL_DICT = {
|
||||
'osnet_x1_0': osnet_x1_0,
|
||||
'osnet_x0_75': osnet_x0_75,
|
||||
'osnet_x0_5': osnet_x0_5,
|
||||
'osnet_x0_25': osnet_x0_25,
|
||||
'deepsort': Extractor
|
||||
}
|
||||
|
||||
|
||||
def load_reid_model(reid_model, reid_model_path):
|
||||
|
||||
if 'osnet' in reid_model:
|
||||
func = REID_MODEL_DICT[reid_model]
|
||||
model = func(num_classes=1, pretrained=False, )
|
||||
load_pretrained_weights(model, reid_model_path)
|
||||
model.cuda().eval()
|
||||
|
||||
elif 'deepsort' in reid_model:
|
||||
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return model
|
||||
|
||||
class BotTracker(object):
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
self.with_reid = not args.discard_reid
|
||||
|
||||
self.reid_model, self.crop_transforms = None, None
|
||||
if self.with_reid:
|
||||
self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
|
||||
self.crop_transforms = T.Compose([
|
||||
# T.ToPILImage(),
|
||||
# T.Resize(size=(256, 128)),
|
||||
T.ToTensor(), # (c, 128, 256)
|
||||
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
])
|
||||
|
||||
|
||||
# camera motion compensation module
|
||||
self.gmc = GMC(method='orb', downscale=2, verbose=None)
|
||||
|
||||
def reid_preprocess(self, obj_bbox):
|
||||
"""
|
||||
preprocess cropped object bboxes
|
||||
|
||||
obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
|
||||
|
||||
return:
|
||||
torch.Tensor of shape (c, 128, 256)
|
||||
"""
|
||||
obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=(128, 128)) # shape: (128, 256, c)
|
||||
|
||||
return self.crop_transforms(obj_bbox)
|
||||
|
||||
def get_feature(self, tlwhs, ori_img):
|
||||
"""
|
||||
get apperance feature of an object
|
||||
tlwhs: shape (num_of_objects, 4)
|
||||
ori_img: original image, np.ndarray, shape(H, W, C)
|
||||
"""
|
||||
obj_bbox = []
|
||||
|
||||
for tlwh in tlwhs:
|
||||
tlwh = list(map(int, tlwh))
|
||||
# if any(tlbr_ == -1 for tlbr_ in tlwh):
|
||||
# print(tlwh)
|
||||
|
||||
tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
|
||||
obj_bbox.append(tlbr_tensor)
|
||||
|
||||
if not obj_bbox:
|
||||
return np.array([])
|
||||
|
||||
obj_bbox = torch.stack(obj_bbox, dim=0)
|
||||
obj_bbox = obj_bbox.cuda()
|
||||
|
||||
features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim)
|
||||
return features.cpu().detach().numpy()
|
||||
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlwh format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
inds_low = scores > 0.1
|
||||
inds_high = scores < self.args.conf_thresh
|
||||
|
||||
inds_second = np.logical_and(inds_low, inds_high)
|
||||
dets_second = bboxes[inds_second]
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
cates_second = categories[inds_second]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
scores_second = scores[inds_second]
|
||||
|
||||
"""Step 1: Extract reid features"""
|
||||
if self.with_reid:
|
||||
features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
|
||||
|
||||
if len(dets) > 0:
|
||||
if self.with_reid:
|
||||
detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
|
||||
(tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
|
||||
else:
|
||||
detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with high score detection boxes'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
# Camera motion compensation
|
||||
warp = self.gmc.apply(ori_img, dets)
|
||||
self.gmc.multi_gmc(tracklet_pool, warp)
|
||||
self.gmc.multi_gmc(unconfirmed, warp)
|
||||
|
||||
ious_dists = iou_distance(tracklet_pool, detections)
|
||||
ious_dists_mask = (ious_dists > 0.5) # high conf iou
|
||||
|
||||
if self.with_reid:
|
||||
# mixed cost matrix
|
||||
emb_dists = embedding_distance(tracklet_pool, detections) / 2.0
|
||||
raw_emb_dists = emb_dists.copy()
|
||||
emb_dists[emb_dists > 0.25] = 1.0
|
||||
emb_dists[ious_dists_mask] = 1.0
|
||||
dists = np.minimum(ious_dists, emb_dists)
|
||||
|
||||
else:
|
||||
dists = ious_dists
|
||||
|
||||
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
''' Step 3: Second association, with low score detection boxes'''
|
||||
# association the untrack to the low score detections
|
||||
if len(dets_second) > 0:
|
||||
'''Detections'''
|
||||
detections_second = [Tracklet(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
|
||||
else:
|
||||
detections_second = []
|
||||
|
||||
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
dists = iou_distance(r_tracked_tracklets, detections_second)
|
||||
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
|
||||
for itracked, idet in matches:
|
||||
track = r_tracked_tracklets[itracked]
|
||||
det = detections_second[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = r_tracked_tracklets[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
ious_dists = iou_distance(unconfirmed, detections)
|
||||
ious_dists_mask = (ious_dists > 0.5)
|
||||
|
||||
if self.with_reid:
|
||||
emb_dists = embedding_distance(unconfirmed, detections) / 2.0
|
||||
raw_emb_dists = emb_dists.copy()
|
||||
emb_dists[emb_dists > 0.25] = 1.0
|
||||
emb_dists[ious_dists_mask] = 1.0
|
||||
dists = np.minimum(ious_dists, emb_dists)
|
||||
else:
|
||||
dists = ious_dists
|
||||
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
201
test/yolov7-tracker/tracker/trackers/byte_tracker.py
Normal file
201
test/yolov7-tracker/tracker/trackers/byte_tracker.py
Normal file
@@ -0,0 +1,201 @@
|
||||
"""
|
||||
ByteTrack
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet
|
||||
from .matching import *
|
||||
|
||||
class ByteTracker(object):
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlbr format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
inds_low = scores > 0.1
|
||||
inds_high = scores < self.args.conf_thresh
|
||||
|
||||
inds_second = np.logical_and(inds_low, inds_high)
|
||||
dets_second = bboxes[inds_second]
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
cates_second = categories[inds_second]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
scores_second = scores[inds_second]
|
||||
|
||||
if len(dets) > 0:
|
||||
'''Detections'''
|
||||
detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with high score detection boxes'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
dists = iou_distance(tracklet_pool, detections)
|
||||
|
||||
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
''' Step 3: Second association, with low score detection boxes'''
|
||||
# association the untrack to the low score detections
|
||||
if len(dets_second) > 0:
|
||||
'''Detections'''
|
||||
detections_second = [Tracklet(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
|
||||
else:
|
||||
detections_second = []
|
||||
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
dists = iou_distance(r_tracked_tracklets, detections_second)
|
||||
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
|
||||
for itracked, idet in matches:
|
||||
track = r_tracked_tracklets[itracked]
|
||||
det = detections_second[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = r_tracked_tracklets[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = iou_distance(unconfirmed, detections)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
204
test/yolov7-tracker/tracker/trackers/c_biou_tracker.py
Normal file
204
test/yolov7-tracker/tracker/trackers/c_biou_tracker.py
Normal file
@@ -0,0 +1,204 @@
|
||||
"""
|
||||
C_BIoU Track
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet, Tracklet_w_bbox_buffer
|
||||
from .matching import *
|
||||
|
||||
class C_BIoUTracker(object):
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlbr format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
inds_low = scores > 0.1
|
||||
inds_high = scores < self.args.conf_thresh
|
||||
|
||||
inds_second = np.logical_and(inds_low, inds_high)
|
||||
dets_second = bboxes[inds_second]
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
cates_second = categories[inds_second]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
scores_second = scores[inds_second]
|
||||
|
||||
if len(dets) > 0:
|
||||
'''Detections'''
|
||||
detections = [Tracklet_w_bbox_buffer(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with high score detection boxes'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
dists = buffered_iou_distance(tracklet_pool, detections, level=1)
|
||||
|
||||
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
''' Step 3: Second association, with low score detection boxes'''
|
||||
# association the untrack to the low score detections
|
||||
if len(dets_second) > 0:
|
||||
'''Detections'''
|
||||
detections_second = [Tracklet_w_bbox_buffer(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
|
||||
else:
|
||||
detections_second = []
|
||||
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
|
||||
|
||||
dists = buffered_iou_distance(r_tracked_tracklets, detections_second, level=2)
|
||||
|
||||
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
|
||||
for itracked, idet in matches:
|
||||
track = r_tracked_tracklets[itracked]
|
||||
det = detections_second[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = r_tracked_tracklets[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = buffered_iou_distance(unconfirmed, detections, level=1)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
@@ -0,0 +1,264 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import copy
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
"""GMC Module"""
|
||||
class GMC:
|
||||
def __init__(self, method='orb', downscale=2, verbose=None):
|
||||
super(GMC, self).__init__()
|
||||
|
||||
self.method = method
|
||||
self.downscale = max(1, int(downscale))
|
||||
|
||||
if self.method == 'orb':
|
||||
self.detector = cv2.FastFeatureDetector_create(20)
|
||||
self.extractor = cv2.ORB_create()
|
||||
self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
|
||||
|
||||
elif self.method == 'sift':
|
||||
self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
|
||||
self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
|
||||
self.matcher = cv2.BFMatcher(cv2.NORM_L2)
|
||||
|
||||
elif self.method == 'ecc':
|
||||
number_of_iterations = 100
|
||||
termination_eps = 1e-5
|
||||
self.warp_mode = cv2.MOTION_EUCLIDEAN
|
||||
self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
|
||||
|
||||
elif self.method == 'file' or self.method == 'files':
|
||||
seqName = verbose[0]
|
||||
ablation = verbose[1]
|
||||
if ablation:
|
||||
filePath = r'tracker/GMC_files/MOT17_ablation'
|
||||
else:
|
||||
filePath = r'tracker/GMC_files/MOTChallenge'
|
||||
|
||||
if '-FRCNN' in seqName:
|
||||
seqName = seqName[:-6]
|
||||
elif '-DPM' in seqName:
|
||||
seqName = seqName[:-4]
|
||||
elif '-SDP' in seqName:
|
||||
seqName = seqName[:-4]
|
||||
|
||||
self.gmcFile = open(filePath + "/GMC-" + seqName + ".txt", 'r')
|
||||
|
||||
if self.gmcFile is None:
|
||||
raise ValueError("Error: Unable to open GMC file in directory:" + filePath)
|
||||
elif self.method == 'none' or self.method == 'None':
|
||||
self.method = 'none'
|
||||
else:
|
||||
raise ValueError("Error: Unknown CMC method:" + method)
|
||||
|
||||
self.prevFrame = None
|
||||
self.prevKeyPoints = None
|
||||
self.prevDescriptors = None
|
||||
|
||||
self.initializedFirstFrame = False
|
||||
|
||||
def apply(self, raw_frame, detections=None):
|
||||
if self.method == 'orb' or self.method == 'sift':
|
||||
return self.applyFeaures(raw_frame, detections)
|
||||
elif self.method == 'ecc':
|
||||
return self.applyEcc(raw_frame, detections)
|
||||
elif self.method == 'file':
|
||||
return self.applyFile(raw_frame, detections)
|
||||
elif self.method == 'none':
|
||||
return np.eye(2, 3)
|
||||
else:
|
||||
return np.eye(2, 3)
|
||||
|
||||
def applyEcc(self, raw_frame, detections=None):
|
||||
|
||||
# Initialize
|
||||
height, width, _ = raw_frame.shape
|
||||
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
|
||||
H = np.eye(2, 3, dtype=np.float32)
|
||||
|
||||
# Downscale image (TODO: consider using pyramids)
|
||||
if self.downscale > 1.0:
|
||||
frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
|
||||
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
|
||||
width = width // self.downscale
|
||||
height = height // self.downscale
|
||||
|
||||
# Handle first frame
|
||||
if not self.initializedFirstFrame:
|
||||
# Initialize data
|
||||
self.prevFrame = frame.copy()
|
||||
|
||||
# Initialization done
|
||||
self.initializedFirstFrame = True
|
||||
|
||||
return H
|
||||
|
||||
# Run the ECC algorithm. The results are stored in warp_matrix.
|
||||
# (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
|
||||
try:
|
||||
(cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
|
||||
except:
|
||||
print('Warning: find transform failed. Set warp as identity')
|
||||
|
||||
return H
|
||||
|
||||
def applyFeaures(self, raw_frame, detections=None):
|
||||
|
||||
# Initialize
|
||||
height, width, _ = raw_frame.shape
|
||||
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
|
||||
H = np.eye(2, 3)
|
||||
|
||||
# Downscale image (TODO: consider using pyramids)
|
||||
if self.downscale > 1.0:
|
||||
# frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
|
||||
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
|
||||
width = width // self.downscale
|
||||
height = height // self.downscale
|
||||
|
||||
# find the keypoints
|
||||
mask = np.zeros_like(frame)
|
||||
# mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255
|
||||
mask[int(0.02 * height): int(0.98 * height), int(0.02 * width): int(0.98 * width)] = 255
|
||||
if detections is not None:
|
||||
for det in detections:
|
||||
tlbr = (det[:4] / self.downscale).astype(np.int_)
|
||||
mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0
|
||||
|
||||
keypoints = self.detector.detect(frame, mask)
|
||||
|
||||
# compute the descriptors
|
||||
keypoints, descriptors = self.extractor.compute(frame, keypoints)
|
||||
|
||||
# Handle first frame
|
||||
if not self.initializedFirstFrame:
|
||||
# Initialize data
|
||||
self.prevFrame = frame.copy()
|
||||
self.prevKeyPoints = copy.copy(keypoints)
|
||||
self.prevDescriptors = copy.copy(descriptors)
|
||||
|
||||
# Initialization done
|
||||
self.initializedFirstFrame = True
|
||||
|
||||
return H
|
||||
|
||||
# Match descriptors.
|
||||
knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)
|
||||
|
||||
# Filtered matches based on smallest spatial distance
|
||||
matches = []
|
||||
spatialDistances = []
|
||||
|
||||
maxSpatialDistance = 0.25 * np.array([width, height])
|
||||
|
||||
# Handle empty matches case
|
||||
if len(knnMatches) == 0:
|
||||
# Store to next iteration
|
||||
self.prevFrame = frame.copy()
|
||||
self.prevKeyPoints = copy.copy(keypoints)
|
||||
self.prevDescriptors = copy.copy(descriptors)
|
||||
|
||||
return H
|
||||
|
||||
for m, n in knnMatches:
|
||||
if m.distance < 0.9 * n.distance:
|
||||
prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
|
||||
currKeyPointLocation = keypoints[m.trainIdx].pt
|
||||
|
||||
spatialDistance = (prevKeyPointLocation[0] - currKeyPointLocation[0],
|
||||
prevKeyPointLocation[1] - currKeyPointLocation[1])
|
||||
|
||||
if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \
|
||||
(np.abs(spatialDistance[1]) < maxSpatialDistance[1]):
|
||||
spatialDistances.append(spatialDistance)
|
||||
matches.append(m)
|
||||
|
||||
meanSpatialDistances = np.mean(spatialDistances, 0)
|
||||
stdSpatialDistances = np.std(spatialDistances, 0)
|
||||
|
||||
inliesrs = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances
|
||||
|
||||
goodMatches = []
|
||||
prevPoints = []
|
||||
currPoints = []
|
||||
for i in range(len(matches)):
|
||||
if inliesrs[i, 0] and inliesrs[i, 1]:
|
||||
goodMatches.append(matches[i])
|
||||
prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
|
||||
currPoints.append(keypoints[matches[i].trainIdx].pt)
|
||||
|
||||
prevPoints = np.array(prevPoints)
|
||||
currPoints = np.array(currPoints)
|
||||
|
||||
# Draw the keypoint matches on the output image
|
||||
if 0:
|
||||
matches_img = np.hstack((self.prevFrame, frame))
|
||||
matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
|
||||
W = np.size(self.prevFrame, 1)
|
||||
for m in goodMatches:
|
||||
prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
|
||||
curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
|
||||
curr_pt[0] += W
|
||||
color = np.random.randint(0, 255, (3,))
|
||||
color = (int(color[0]), int(color[1]), int(color[2]))
|
||||
|
||||
matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
|
||||
matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)
|
||||
matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)
|
||||
|
||||
plt.figure()
|
||||
plt.imshow(matches_img)
|
||||
plt.show()
|
||||
|
||||
# Find rigid matrix
|
||||
if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
|
||||
H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
|
||||
|
||||
# Handle downscale
|
||||
if self.downscale > 1.0:
|
||||
H[0, 2] *= self.downscale
|
||||
H[1, 2] *= self.downscale
|
||||
else:
|
||||
print('Warning: not enough matching points')
|
||||
|
||||
# Store to next iteration
|
||||
self.prevFrame = frame.copy()
|
||||
self.prevKeyPoints = copy.copy(keypoints)
|
||||
self.prevDescriptors = copy.copy(descriptors)
|
||||
|
||||
return H
|
||||
|
||||
def applyFile(self, raw_frame, detections=None):
|
||||
line = self.gmcFile.readline()
|
||||
tokens = line.split("\t")
|
||||
H = np.eye(2, 3, dtype=np.float_)
|
||||
H[0, 0] = float(tokens[1])
|
||||
H[0, 1] = float(tokens[2])
|
||||
H[0, 2] = float(tokens[3])
|
||||
H[1, 0] = float(tokens[4])
|
||||
H[1, 1] = float(tokens[5])
|
||||
H[1, 2] = float(tokens[6])
|
||||
|
||||
return H
|
||||
|
||||
@staticmethod
|
||||
def multi_gmc(stracks, H=np.eye(2, 3)):
|
||||
"""
|
||||
GMC module prediction
|
||||
:param stracks: List[Strack]
|
||||
"""
|
||||
if len(stracks) > 0:
|
||||
multi_mean = np.asarray([st.kalman_filter.kf.x.copy() for st in stracks])
|
||||
multi_covariance = np.asarray([st.kalman_filter.kf.P for st in stracks])
|
||||
|
||||
R = H[:2, :2]
|
||||
R8x8 = np.kron(np.eye(4, dtype=float), R)
|
||||
t = H[:2, 2]
|
||||
|
||||
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
|
||||
mean = R8x8.dot(mean)
|
||||
mean[:2] += t
|
||||
cov = R8x8.dot(cov).dot(R8x8.transpose())
|
||||
|
||||
stracks[i].kalman_filter.kf.x = mean
|
||||
stracks[i].kalman_filter.kf.P = cov
|
||||
327
test/yolov7-tracker/tracker/trackers/deepsort_tracker.py
Normal file
327
test/yolov7-tracker/tracker/trackers/deepsort_tracker.py
Normal file
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
Deep Sort
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchvision.ops import nms
|
||||
|
||||
import cv2
|
||||
import torchvision.transforms as T
|
||||
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet, Tracklet_w_reid
|
||||
from .matching import *
|
||||
|
||||
from .reid_models.OSNet import *
|
||||
from .reid_models.load_model_tools import load_pretrained_weights
|
||||
from .reid_models.deepsort_reid import Extractor
|
||||
|
||||
REID_MODEL_DICT = {
|
||||
'osnet_x1_0': osnet_x1_0,
|
||||
'osnet_x0_75': osnet_x0_75,
|
||||
'osnet_x0_5': osnet_x0_5,
|
||||
'osnet_x0_25': osnet_x0_25,
|
||||
'deepsort': Extractor
|
||||
}
|
||||
|
||||
|
||||
def load_reid_model(reid_model, reid_model_path):
|
||||
|
||||
if 'osnet' in reid_model:
|
||||
func = REID_MODEL_DICT[reid_model]
|
||||
model = func(num_classes=1, pretrained=False, )
|
||||
load_pretrained_weights(model, reid_model_path)
|
||||
model.cuda().eval()
|
||||
|
||||
elif 'deepsort' in reid_model:
|
||||
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return model
|
||||
|
||||
|
||||
class DeepSortTracker(object):
|
||||
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
self.with_reid = not args.discard_reid
|
||||
|
||||
self.reid_model, self.crop_transforms = None, None
|
||||
if self.with_reid:
|
||||
self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
|
||||
self.crop_transforms = T.Compose([
|
||||
# T.ToPILImage(),
|
||||
# T.Resize(size=(256, 128)),
|
||||
T.ToTensor(), # (c, 128, 256)
|
||||
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
])
|
||||
|
||||
self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128)
|
||||
|
||||
|
||||
def reid_preprocess(self, obj_bbox):
|
||||
"""
|
||||
preprocess cropped object bboxes
|
||||
|
||||
obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
|
||||
|
||||
return:
|
||||
torch.Tensor of shape (c, 128, 256)
|
||||
"""
|
||||
|
||||
obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size) # shape: (h, w, c)
|
||||
|
||||
return self.crop_transforms(obj_bbox)
|
||||
|
||||
def get_feature(self, tlwhs, ori_img):
|
||||
"""
|
||||
get apperance feature of an object
|
||||
tlwhs: shape (num_of_objects, 4)
|
||||
ori_img: original image, np.ndarray, shape(H, W, C)
|
||||
"""
|
||||
obj_bbox = []
|
||||
|
||||
for tlwh in tlwhs:
|
||||
tlwh = list(map(int, tlwh))
|
||||
|
||||
# limit to the legal range
|
||||
tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0)
|
||||
|
||||
tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
|
||||
|
||||
obj_bbox.append(tlbr_tensor)
|
||||
|
||||
if not obj_bbox:
|
||||
return np.array([])
|
||||
|
||||
obj_bbox = torch.stack(obj_bbox, dim=0)
|
||||
obj_bbox = obj_bbox.cuda()
|
||||
|
||||
features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim)
|
||||
return features.cpu().detach().numpy()
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlbr format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
|
||||
features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
|
||||
|
||||
if len(dets) > 0:
|
||||
'''Detections'''
|
||||
detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
|
||||
(tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with appearance'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
|
||||
matches, u_track, u_detection = matching_cascade(distance_metric=self.gated_metric,
|
||||
matching_thresh=0.9,
|
||||
cascade_depth=30,
|
||||
tracks=tracklet_pool,
|
||||
detections=detections
|
||||
)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
'''Step 3: Second association, with iou'''
|
||||
tracklet_for_iou = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
detection_for_iou = [detections[i] for i in u_detection]
|
||||
|
||||
dists = iou_distance(tracklet_for_iou, detection_for_iou)
|
||||
|
||||
matches, u_track, u_detection = linear_assignment(dists, thresh=0.5)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_for_iou[itracked]
|
||||
det = detection_for_iou[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detection_for_iou[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = tracklet_for_iou[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detection_for_iou[i] for i in u_detection]
|
||||
dists = iou_distance(unconfirmed, detections)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
def gated_metric(self, tracks, dets):
|
||||
"""
|
||||
get cost matrix, firstly calculate apperence cost, then filter by Kalman state.
|
||||
|
||||
tracks: List[STrack]
|
||||
dets: List[STrack]
|
||||
"""
|
||||
apperance_dist = nearest_embedding_distance(tracks=tracks, detections=dets, metric='cosine')
|
||||
cost_matrix = self.gate_cost_matrix(apperance_dist, tracks, dets, )
|
||||
return cost_matrix
|
||||
|
||||
def gate_cost_matrix(self, cost_matrix, tracks, dets, max_apperance_thresh=0.15, gated_cost=1e5, only_position=False):
|
||||
"""
|
||||
gate cost matrix by calculating the Kalman state distance and constrainted by
|
||||
0.95 confidence interval of x2 distribution
|
||||
|
||||
cost_matrix: np.ndarray, shape (len(tracks), len(dets))
|
||||
tracks: List[STrack]
|
||||
dets: List[STrack]
|
||||
gated_cost: a very largt const to infeasible associations
|
||||
only_position: use [xc, yc, a, h] as state vector or only use [xc, yc]
|
||||
|
||||
return:
|
||||
updated cost_matirx, np.ndarray
|
||||
"""
|
||||
gating_dim = 2 if only_position else 4
|
||||
gating_threshold = chi2inv95[gating_dim]
|
||||
measurements = np.asarray([Tracklet.tlwh_to_xyah(det.tlwh) for det in dets]) # (len(dets), 4)
|
||||
|
||||
cost_matrix[cost_matrix > max_apperance_thresh] = gated_cost
|
||||
for row, track in enumerate(tracks):
|
||||
gating_distance = track.kalman_filter.gating_distance(measurements, )
|
||||
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
|
||||
return cost_matrix
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
@@ -0,0 +1,74 @@
|
||||
from filterpy.kalman import KalmanFilter
|
||||
import numpy as np
|
||||
import scipy
|
||||
|
||||
class BaseKalman:
|
||||
|
||||
def __init__(self,
|
||||
state_dim: int = 8,
|
||||
observation_dim: int = 4,
|
||||
F: np.ndarray = np.zeros((0, )),
|
||||
P: np.ndarray = np.zeros((0, )),
|
||||
Q: np.ndarray = np.zeros((0, )),
|
||||
H: np.ndarray = np.zeros((0, )),
|
||||
R: np.ndarray = np.zeros((0, )),
|
||||
) -> None:
|
||||
|
||||
self.kf = KalmanFilter(dim_x=state_dim, dim_z=observation_dim, dim_u=0)
|
||||
if F.shape[0] > 0: self.kf.F = F # if valid
|
||||
if P.shape[0] > 0: self.kf.P = P
|
||||
if Q.shape[0] > 0: self.kf.Q = Q
|
||||
if H.shape[0] > 0: self.kf.H = H
|
||||
if R.shape[0] > 0: self.kf.R = R
|
||||
|
||||
def initialize(self, observation):
|
||||
return NotImplementedError
|
||||
|
||||
def predict(self, ):
|
||||
self.kf.predict()
|
||||
|
||||
def update(self, observation, **kwargs):
|
||||
self.kf.update(observation, self.R, self.H)
|
||||
|
||||
def get_state(self, ):
|
||||
return self.kf.x
|
||||
|
||||
def gating_distance(self, measurements, only_position=False):
|
||||
"""Compute gating distance between state distribution and measurements.
|
||||
A suitable distance threshold can be obtained from `chi2inv95`. If
|
||||
`only_position` is False, the chi-square distribution has 4 degrees of
|
||||
freedom, otherwise 2.
|
||||
Parameters
|
||||
----------
|
||||
measurements : ndarray
|
||||
An Nx4 dimensional matrix of N measurements, note the format (whether xywh or xyah or others)
|
||||
should be identical to state definition
|
||||
only_position : Optional[bool]
|
||||
If True, distance computation is done with respect to the bounding
|
||||
box center position only.
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
Returns an array of length N, where the i-th element contains the
|
||||
squared Mahalanobis distance between (mean, covariance) and
|
||||
`measurements[i]`.
|
||||
"""
|
||||
|
||||
# map state space to measurement space
|
||||
mean = self.kf.x.copy()
|
||||
mean = np.dot(self.kf.H, mean)
|
||||
covariance = np.linalg.multi_dot((self.kf.H, self.kf.P, self.kf.H.T))
|
||||
|
||||
if only_position:
|
||||
mean, covariance = mean[:2], covariance[:2, :2]
|
||||
measurements = measurements[:, :2]
|
||||
|
||||
cholesky_factor = np.linalg.cholesky(covariance)
|
||||
d = measurements - mean
|
||||
z = scipy.linalg.solve_triangular(
|
||||
cholesky_factor, d.T, lower=True, check_finite=False,
|
||||
overwrite_b=True)
|
||||
squared_maha = np.sum(z * z, axis=0)
|
||||
return squared_maha
|
||||
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
from numpy.core.multiarray import zeros as zeros
|
||||
from .base_kalman import BaseKalman
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
class BotKalman(BaseKalman):
|
||||
|
||||
def __init__(self, ):
|
||||
|
||||
state_dim = 8 # [x, y, w, h, vx, vy, vw, vh]
|
||||
observation_dim = 4
|
||||
|
||||
F = np.eye(state_dim, state_dim)
|
||||
'''
|
||||
[1, 0, 0, 0, 1, 0, 0]
|
||||
[0, 1, 0, 0, 0, 1, 0]
|
||||
...
|
||||
'''
|
||||
for i in range(state_dim // 2):
|
||||
F[i, i + state_dim // 2] = 1
|
||||
|
||||
H = np.eye(state_dim // 2, state_dim)
|
||||
|
||||
super().__init__(state_dim=state_dim,
|
||||
observation_dim=observation_dim,
|
||||
F=F,
|
||||
H=H)
|
||||
|
||||
self._std_weight_position = 1. / 20
|
||||
self._std_weight_velocity = 1. / 160
|
||||
|
||||
def initialize(self, observation):
|
||||
""" init x, P, Q, R
|
||||
|
||||
Args:
|
||||
observation: x-y-w-h format
|
||||
"""
|
||||
# init x, P, Q, R
|
||||
|
||||
mean_pos = observation
|
||||
mean_vel = np.zeros_like(observation)
|
||||
self.kf.x = np.r_[mean_pos, mean_vel] # x_{0, 0}
|
||||
|
||||
std = [
|
||||
2 * self._std_weight_position * observation[2], # related to h
|
||||
2 * self._std_weight_position * observation[3],
|
||||
2 * self._std_weight_position * observation[2],
|
||||
2 * self._std_weight_position * observation[3],
|
||||
10 * self._std_weight_velocity * observation[2],
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
10 * self._std_weight_velocity * observation[2],
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
]
|
||||
|
||||
self.kf.P = np.diag(np.square(std)) # P_{0, 0}
|
||||
|
||||
def predict(self, ):
|
||||
""" predict step
|
||||
|
||||
x_{n + 1, n} = F * x_{n, n}
|
||||
P_{n + 1, n} = F * P_{n, n} * F^T + Q
|
||||
|
||||
"""
|
||||
std_pos = [
|
||||
self._std_weight_position * self.kf.x[2],
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
self._std_weight_position * self.kf.x[2],
|
||||
self._std_weight_position * self.kf.x[3]]
|
||||
std_vel = [
|
||||
self._std_weight_velocity * self.kf.x[2],
|
||||
self._std_weight_velocity * self.kf.x[3],
|
||||
self._std_weight_velocity * self.kf.x[2],
|
||||
self._std_weight_velocity * self.kf.x[3]]
|
||||
|
||||
Q = np.diag(np.square(np.r_[std_pos, std_vel]))
|
||||
|
||||
self.kf.predict(Q=Q)
|
||||
|
||||
def update(self, z):
|
||||
""" update step
|
||||
|
||||
Args:
|
||||
z: observation x-y-a-h format
|
||||
|
||||
K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
|
||||
x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
|
||||
P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
|
||||
|
||||
"""
|
||||
|
||||
std = [
|
||||
self._std_weight_position * self.kf.x[2],
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
self._std_weight_position * self.kf.x[2],
|
||||
self._std_weight_position * self.kf.x[3]]
|
||||
|
||||
R = np.diag(np.square(std))
|
||||
|
||||
self.kf.update(z=z, R=R)
|
||||
@@ -0,0 +1,97 @@
|
||||
from .base_kalman import BaseKalman
|
||||
import numpy as np
|
||||
|
||||
class ByteKalman(BaseKalman):
|
||||
|
||||
def __init__(self, ):
|
||||
|
||||
state_dim = 8 # [x, y, a, h, vx, vy, va, vh]
|
||||
observation_dim = 4
|
||||
|
||||
F = np.eye(state_dim, state_dim)
|
||||
'''
|
||||
[1, 0, 0, 0, 1, 0, 0]
|
||||
[0, 1, 0, 0, 0, 1, 0]
|
||||
...
|
||||
'''
|
||||
for i in range(state_dim // 2):
|
||||
F[i, i + state_dim // 2] = 1
|
||||
|
||||
H = np.eye(state_dim // 2, state_dim)
|
||||
|
||||
super().__init__(state_dim=state_dim,
|
||||
observation_dim=observation_dim,
|
||||
F=F,
|
||||
H=H)
|
||||
|
||||
self._std_weight_position = 1. / 20
|
||||
self._std_weight_velocity = 1. / 160
|
||||
|
||||
def initialize(self, observation):
|
||||
""" init x, P, Q, R
|
||||
|
||||
Args:
|
||||
observation: x-y-a-h format
|
||||
"""
|
||||
# init x, P, Q, R
|
||||
|
||||
mean_pos = observation
|
||||
mean_vel = np.zeros_like(observation)
|
||||
self.kf.x = np.r_[mean_pos, mean_vel] # x_{0, 0}
|
||||
|
||||
std = [
|
||||
2 * self._std_weight_position * observation[3], # related to h
|
||||
2 * self._std_weight_position * observation[3],
|
||||
1e-2,
|
||||
2 * self._std_weight_position * observation[3],
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
1e-5,
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
]
|
||||
|
||||
self.kf.P = np.diag(np.square(std)) # P_{0, 0}
|
||||
|
||||
def predict(self, ):
|
||||
""" predict step
|
||||
|
||||
x_{n + 1, n} = F * x_{n, n}
|
||||
P_{n + 1, n} = F * P_{n, n} * F^T + Q
|
||||
|
||||
"""
|
||||
std_pos = [
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
1e-2,
|
||||
self._std_weight_position * self.kf.x[3]]
|
||||
std_vel = [
|
||||
self._std_weight_velocity * self.kf.x[3],
|
||||
self._std_weight_velocity * self.kf.x[3],
|
||||
1e-5,
|
||||
self._std_weight_velocity * self.kf.x[3]]
|
||||
|
||||
Q = np.diag(np.square(np.r_[std_pos, std_vel]))
|
||||
|
||||
self.kf.predict(Q=Q)
|
||||
|
||||
def update(self, z):
|
||||
""" update step
|
||||
|
||||
Args:
|
||||
z: observation x-y-a-h format
|
||||
|
||||
K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
|
||||
x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
|
||||
P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
|
||||
|
||||
"""
|
||||
|
||||
std = [
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
1e-1,
|
||||
self._std_weight_position * self.kf.x[3]]
|
||||
|
||||
R = np.diag(np.square(std))
|
||||
|
||||
self.kf.update(z=z, R=R)
|
||||
@@ -0,0 +1,144 @@
|
||||
from numpy.core.multiarray import zeros as zeros
|
||||
from .base_kalman import BaseKalman
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
|
||||
class OCSORTKalman(BaseKalman):
|
||||
|
||||
def __init__(self, ):
|
||||
|
||||
state_dim = 7 # [x, y, s, a, vx, vy, vs] s: area
|
||||
observation_dim = 4
|
||||
|
||||
F = np.array([[1, 0, 0, 0, 1, 0, 0],
|
||||
[0, 1, 0, 0, 0, 1, 0],
|
||||
[0, 0, 1, 0, 0, 0, 1],
|
||||
[0, 0, 0, 1, 0, 0, 0],
|
||||
[0, 0, 0, 0, 1, 0, 0],
|
||||
[0, 0, 0, 0, 0, 1, 0],
|
||||
[0, 0, 0, 0, 0, 0, 1]])
|
||||
|
||||
H = np.eye(state_dim // 2 + 1, state_dim)
|
||||
|
||||
super().__init__(state_dim=state_dim,
|
||||
observation_dim=observation_dim,
|
||||
F=F,
|
||||
H=H)
|
||||
|
||||
# TODO check
|
||||
# give high uncertainty to the unobservable initial velocities
|
||||
self.kf.R[2:, 2:] *= 10 # [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 10]]
|
||||
self.kf.P[4:, 4:] *= 1000
|
||||
self.kf.P *= 10
|
||||
self.kf.Q[-1, -1] *= 0.01
|
||||
self.kf.Q[4:, 4:] *= 0.01
|
||||
|
||||
# keep all observations
|
||||
self.history_obs = []
|
||||
self.attr_saved = None
|
||||
self.observed = False
|
||||
|
||||
def initialize(self, observation):
|
||||
"""
|
||||
Args:
|
||||
observation: x-y-s-a
|
||||
"""
|
||||
self.kf.x = self.kf.x.flatten()
|
||||
self.kf.x[:4] = observation
|
||||
|
||||
|
||||
def predict(self, ):
|
||||
""" predict step
|
||||
|
||||
"""
|
||||
|
||||
# s + vs
|
||||
if (self.kf.x[6] + self.kf.x[2] <= 0):
|
||||
self.kf.x[6] *= 0.0
|
||||
|
||||
self.kf.predict()
|
||||
|
||||
def _freeze(self, ):
|
||||
""" freeze all the param of Kalman
|
||||
|
||||
"""
|
||||
self.attr_saved = deepcopy(self.kf.__dict__)
|
||||
|
||||
def _unfreeze(self, ):
|
||||
""" when observe an lost object again, use the virtual trajectory
|
||||
|
||||
"""
|
||||
if self.attr_saved is not None:
|
||||
new_history = deepcopy(self.history_obs)
|
||||
self.kf.__dict__ = self.attr_saved
|
||||
|
||||
self.history_obs = self.history_obs[:-1]
|
||||
|
||||
occur = [int(d is None) for d in new_history]
|
||||
indices = np.where(np.array(occur)==0)[0]
|
||||
index1 = indices[-2]
|
||||
index2 = indices[-1]
|
||||
box1 = new_history[index1]
|
||||
x1, y1, s1, r1 = box1
|
||||
w1 = np.sqrt(s1 * r1)
|
||||
h1 = np.sqrt(s1 / r1)
|
||||
box2 = new_history[index2]
|
||||
x2, y2, s2, r2 = box2
|
||||
w2 = np.sqrt(s2 * r2)
|
||||
h2 = np.sqrt(s2 / r2)
|
||||
time_gap = index2 - index1
|
||||
dx = (x2-x1)/time_gap
|
||||
dy = (y2-y1)/time_gap
|
||||
dw = (w2-w1)/time_gap
|
||||
dh = (h2-h1)/time_gap
|
||||
for i in range(index2 - index1):
|
||||
"""
|
||||
The default virtual trajectory generation is by linear
|
||||
motion (constant speed hypothesis), you could modify this
|
||||
part to implement your own.
|
||||
"""
|
||||
x = x1 + (i+1) * dx
|
||||
y = y1 + (i+1) * dy
|
||||
w = w1 + (i+1) * dw
|
||||
h = h1 + (i+1) * dh
|
||||
s = w * h
|
||||
r = w / float(h)
|
||||
new_box = np.array([x, y, s, r]).reshape((4, 1))
|
||||
"""
|
||||
I still use predict-update loop here to refresh the parameters,
|
||||
but this can be faster by directly modifying the internal parameters
|
||||
as suggested in the paper. I keep this naive but slow way for
|
||||
easy read and understanding
|
||||
"""
|
||||
self.kf.update(new_box)
|
||||
if not i == (index2-index1-1):
|
||||
self.kf.predict()
|
||||
|
||||
|
||||
def update(self, z):
|
||||
""" update step
|
||||
|
||||
For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman
|
||||
|
||||
Args:
|
||||
z: observation x-y-s-a format
|
||||
"""
|
||||
|
||||
self.history_obs.append(z)
|
||||
|
||||
if z is None:
|
||||
if self.observed:
|
||||
self._freeze()
|
||||
self.observed = False
|
||||
|
||||
self.kf.update(z)
|
||||
|
||||
else:
|
||||
if not self.observed: # Get observation, use online smoothing to re-update parameters
|
||||
self._unfreeze()
|
||||
|
||||
self.kf.update(z)
|
||||
|
||||
self.observed = True
|
||||
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
from numpy.core.multiarray import zeros as zeros
|
||||
from .base_kalman import BaseKalman
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
|
||||
class SORTKalman(BaseKalman):
|
||||
|
||||
def __init__(self, ):
|
||||
|
||||
state_dim = 7 # [x, y, s, a, vx, vy, vs] s: area
|
||||
observation_dim = 4
|
||||
|
||||
F = np.array([[1, 0, 0, 0, 1, 0, 0],
|
||||
[0, 1, 0, 0, 0, 1, 0],
|
||||
[0, 0, 1, 0, 0, 0, 1],
|
||||
[0, 0, 0, 1, 0, 0, 0],
|
||||
[0, 0, 0, 0, 1, 0, 0],
|
||||
[0, 0, 0, 0, 0, 1, 0],
|
||||
[0, 0, 0, 0, 0, 0, 1]])
|
||||
|
||||
H = np.eye(state_dim // 2 + 1, state_dim)
|
||||
|
||||
super().__init__(state_dim=state_dim,
|
||||
observation_dim=observation_dim,
|
||||
F=F,
|
||||
H=H)
|
||||
|
||||
# TODO check
|
||||
# give high uncertainty to the unobservable initial velocities
|
||||
self.kf.R[2:, 2:] *= 10 # [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 10]]
|
||||
self.kf.P[4:, 4:] *= 1000
|
||||
self.kf.P *= 10
|
||||
self.kf.Q[-1, -1] *= 0.01
|
||||
self.kf.Q[4:, 4:] *= 0.01
|
||||
|
||||
# keep all observations
|
||||
self.history_obs = []
|
||||
self.attr_saved = None
|
||||
self.observed = False
|
||||
|
||||
def initialize(self, observation):
|
||||
"""
|
||||
Args:
|
||||
observation: x-y-s-a
|
||||
"""
|
||||
self.kf.x = self.kf.x.flatten()
|
||||
self.kf.x[:4] = observation
|
||||
|
||||
|
||||
def predict(self, ):
|
||||
""" predict step
|
||||
|
||||
"""
|
||||
|
||||
# s + vs
|
||||
if (self.kf.x[6] + self.kf.x[2] <= 0):
|
||||
self.kf.x[6] *= 0.0
|
||||
|
||||
self.kf.predict()
|
||||
|
||||
def update(self, z):
|
||||
""" update step
|
||||
|
||||
For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman
|
||||
|
||||
Args:
|
||||
z: observation x-y-s-a format
|
||||
"""
|
||||
|
||||
self.kf.update(z)
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
from .base_kalman import BaseKalman
|
||||
import numpy as np
|
||||
|
||||
class NSAKalman(BaseKalman):
|
||||
|
||||
def __init__(self, ):
|
||||
|
||||
state_dim = 8 # [x, y, a, h, vx, vy, va, vh]
|
||||
observation_dim = 4
|
||||
|
||||
F = np.eye(state_dim, state_dim)
|
||||
'''
|
||||
[1, 0, 0, 0, 1, 0, 0]
|
||||
[0, 1, 0, 0, 0, 1, 0]
|
||||
...
|
||||
'''
|
||||
for i in range(state_dim // 2):
|
||||
F[i, i + state_dim // 2] = 1
|
||||
|
||||
H = np.eye(state_dim // 2, state_dim)
|
||||
|
||||
super().__init__(state_dim=state_dim,
|
||||
observation_dim=observation_dim,
|
||||
F=F,
|
||||
H=H)
|
||||
|
||||
self._std_weight_position = 1. / 20
|
||||
self._std_weight_velocity = 1. / 160
|
||||
|
||||
def initialize(self, observation):
|
||||
""" init x, P, Q, R
|
||||
|
||||
Args:
|
||||
observation: x-y-a-h format
|
||||
"""
|
||||
# init x, P, Q, R
|
||||
|
||||
mean_pos = observation
|
||||
mean_vel = np.zeros_like(observation)
|
||||
self.kf.x = np.r_[mean_pos, mean_vel] # x_{0, 0}
|
||||
|
||||
std = [
|
||||
2 * self._std_weight_position * observation[3], # related to h
|
||||
2 * self._std_weight_position * observation[3],
|
||||
1e-2,
|
||||
2 * self._std_weight_position * observation[3],
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
1e-5,
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
]
|
||||
|
||||
self.kf.P = np.diag(np.square(std)) # P_{0, 0}
|
||||
|
||||
def predict(self, ):
|
||||
""" predict step
|
||||
|
||||
x_{n + 1, n} = F * x_{n, n}
|
||||
P_{n + 1, n} = F * P_{n, n} * F^T + Q
|
||||
|
||||
"""
|
||||
std_pos = [
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
1e-2,
|
||||
self._std_weight_position * self.kf.x[3]]
|
||||
std_vel = [
|
||||
self._std_weight_velocity * self.kf.x[3],
|
||||
self._std_weight_velocity * self.kf.x[3],
|
||||
1e-5,
|
||||
self._std_weight_velocity * self.kf.x[3]]
|
||||
|
||||
Q = np.diag(np.square(np.r_[std_pos, std_vel]))
|
||||
|
||||
self.kf.predict(Q=Q)
|
||||
|
||||
def update(self, z, score):
|
||||
""" update step
|
||||
|
||||
Args:
|
||||
z: observation x-y-a-h format
|
||||
score: the detection score/confidence required by NSA kalman
|
||||
|
||||
K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
|
||||
x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
|
||||
P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
|
||||
|
||||
"""
|
||||
|
||||
std = [
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
1e-1,
|
||||
self._std_weight_position * self.kf.x[3]]
|
||||
|
||||
# NSA
|
||||
std = [(1. - score) * x for x in std]
|
||||
|
||||
R = np.diag(np.square(std))
|
||||
|
||||
self.kf.update(z=z, R=R)
|
||||
@@ -0,0 +1,27 @@
|
||||
from .base_kalman import BaseKalman
|
||||
import numpy as np
|
||||
|
||||
class UCMCKalman(BaseKalman):
|
||||
def __init__(self, ):
|
||||
|
||||
state_dim = 8
|
||||
observation_dim = 4
|
||||
|
||||
F = np.eye(state_dim, state_dim)
|
||||
'''
|
||||
[1, 0, 0, 0, 1, 0, 0]
|
||||
[0, 1, 0, 0, 0, 1, 0]
|
||||
...
|
||||
'''
|
||||
for i in range(state_dim // 2):
|
||||
F[i, i + state_dim // 2] = 1
|
||||
|
||||
H = np.eye(state_dim // 2, state_dim)
|
||||
|
||||
super().__init__(state_dim=state_dim,
|
||||
observation_dim=observation_dim,
|
||||
F=F,
|
||||
H=H)
|
||||
|
||||
self._std_weight_position = 1. / 20
|
||||
self._std_weight_velocity = 1. / 160
|
||||
388
test/yolov7-tracker/tracker/trackers/matching.py
Normal file
388
test/yolov7-tracker/tracker/trackers/matching.py
Normal file
@@ -0,0 +1,388 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import scipy
|
||||
import lap
|
||||
from scipy.spatial.distance import cdist
|
||||
import math
|
||||
from cython_bbox import bbox_overlaps as bbox_ious
|
||||
import time
|
||||
|
||||
chi2inv95 = {
|
||||
1: 3.8415,
|
||||
2: 5.9915,
|
||||
3: 7.8147,
|
||||
4: 9.4877,
|
||||
5: 11.070,
|
||||
6: 12.592,
|
||||
7: 14.067,
|
||||
8: 15.507,
|
||||
9: 16.919}
|
||||
|
||||
|
||||
def merge_matches(m1, m2, shape):
|
||||
O,P,Q = shape
|
||||
m1 = np.asarray(m1)
|
||||
m2 = np.asarray(m2)
|
||||
|
||||
M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
|
||||
M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
|
||||
|
||||
mask = M1*M2
|
||||
match = mask.nonzero()
|
||||
match = list(zip(match[0], match[1]))
|
||||
unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
|
||||
unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
|
||||
|
||||
return match, unmatched_O, unmatched_Q
|
||||
|
||||
|
||||
def _indices_to_matches(cost_matrix, indices, thresh):
|
||||
matched_cost = cost_matrix[tuple(zip(*indices))]
|
||||
matched_mask = (matched_cost <= thresh)
|
||||
|
||||
matches = indices[matched_mask]
|
||||
unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
|
||||
unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
|
||||
|
||||
return matches, unmatched_a, unmatched_b
|
||||
|
||||
|
||||
def linear_assignment(cost_matrix, thresh):
|
||||
if cost_matrix.size == 0:
|
||||
return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
|
||||
matches, unmatched_a, unmatched_b = [], [], []
|
||||
cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
|
||||
for ix, mx in enumerate(x):
|
||||
if mx >= 0:
|
||||
matches.append([ix, mx])
|
||||
unmatched_a = np.where(x < 0)[0]
|
||||
unmatched_b = np.where(y < 0)[0]
|
||||
matches = np.asarray(matches)
|
||||
return matches, unmatched_a, unmatched_b
|
||||
|
||||
|
||||
def ious(atlbrs, btlbrs):
|
||||
"""
|
||||
Compute cost based on IoU
|
||||
:type atlbrs: list[tlbr] | np.ndarray
|
||||
:type atlbrs: list[tlbr] | np.ndarray
|
||||
|
||||
:rtype ious np.ndarray
|
||||
"""
|
||||
ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
|
||||
if ious.size == 0:
|
||||
return ious
|
||||
|
||||
ious = bbox_ious(
|
||||
np.ascontiguousarray(atlbrs, dtype=np.float),
|
||||
np.ascontiguousarray(btlbrs, dtype=np.float)
|
||||
)
|
||||
|
||||
return ious
|
||||
|
||||
|
||||
def iou_distance(atracks, btracks):
|
||||
"""
|
||||
Compute cost based on IoU
|
||||
:type atracks: list[STrack]
|
||||
:type btracks: list[STrack]
|
||||
|
||||
:rtype cost_matrix np.ndarray
|
||||
"""
|
||||
|
||||
if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
|
||||
atlbrs = atracks
|
||||
btlbrs = btracks
|
||||
else:
|
||||
atlbrs = [track.tlbr for track in atracks]
|
||||
btlbrs = [track.tlbr for track in btracks]
|
||||
_ious = ious(atlbrs, btlbrs)
|
||||
cost_matrix = 1 - _ious
|
||||
|
||||
return cost_matrix
|
||||
|
||||
def v_iou_distance(atracks, btracks):
|
||||
"""
|
||||
Compute cost based on IoU
|
||||
:type atracks: list[STrack]
|
||||
:type btracks: list[STrack]
|
||||
|
||||
:rtype cost_matrix np.ndarray
|
||||
"""
|
||||
|
||||
if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
|
||||
atlbrs = atracks
|
||||
btlbrs = btracks
|
||||
else:
|
||||
atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
|
||||
btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
|
||||
_ious = ious(atlbrs, btlbrs)
|
||||
cost_matrix = 1 - _ious
|
||||
|
||||
return cost_matrix
|
||||
|
||||
def embedding_distance(tracks, detections, metric='cosine'):
|
||||
"""
|
||||
:param tracks: list[STrack]
|
||||
:param detections: list[BaseTrack]
|
||||
:param metric:
|
||||
:return: cost_matrix np.ndarray
|
||||
"""
|
||||
|
||||
cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
|
||||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
|
||||
#for i, track in enumerate(tracks):
|
||||
#cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
|
||||
track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
|
||||
cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features
|
||||
return cost_matrix
|
||||
|
||||
|
||||
def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
|
||||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
gating_dim = 2 if only_position else 4
|
||||
gating_threshold = chi2inv95[gating_dim]
|
||||
measurements = np.asarray([det.to_xyah() for det in detections])
|
||||
for row, track in enumerate(tracks):
|
||||
gating_distance = kf.gating_distance(
|
||||
track.mean, track.covariance, measurements, only_position, metric='maha')
|
||||
cost_matrix[row, gating_distance > gating_threshold] = np.inf
|
||||
cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
|
||||
return cost_matrix
|
||||
|
||||
|
||||
def fuse_iou(cost_matrix, tracks, detections):
|
||||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
reid_sim = 1 - cost_matrix
|
||||
iou_dist = iou_distance(tracks, detections)
|
||||
iou_sim = 1 - iou_dist
|
||||
fuse_sim = reid_sim * (1 + iou_sim) / 2
|
||||
det_scores = np.array([det.score for det in detections])
|
||||
det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
|
||||
#fuse_sim = fuse_sim * (1 + det_scores) / 2
|
||||
fuse_cost = 1 - fuse_sim
|
||||
return fuse_cost
|
||||
|
||||
|
||||
def fuse_score(cost_matrix, detections):
|
||||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
iou_sim = 1 - cost_matrix
|
||||
det_scores = np.array([det.score for det in detections])
|
||||
det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
|
||||
fuse_sim = iou_sim * det_scores
|
||||
fuse_cost = 1 - fuse_sim
|
||||
return fuse_cost
|
||||
|
||||
|
||||
def greedy_assignment_iou(dist, thresh):
|
||||
matched_indices = []
|
||||
if dist.shape[1] == 0:
|
||||
return np.array(matched_indices, np.int32).reshape(-1, 2)
|
||||
for i in range(dist.shape[0]):
|
||||
j = dist[i].argmin()
|
||||
if dist[i][j] < thresh:
|
||||
dist[:, j] = 1.
|
||||
matched_indices.append([j, i])
|
||||
return np.array(matched_indices, np.int32).reshape(-1, 2)
|
||||
|
||||
def greedy_assignment(dists, threshs):
|
||||
matches = greedy_assignment_iou(dists.T, threshs)
|
||||
u_det = [d for d in range(dists.shape[1]) if not (d in matches[:, 1])]
|
||||
u_track = [d for d in range(dists.shape[0]) if not (d in matches[:, 0])]
|
||||
return matches, u_track, u_det
|
||||
|
||||
def fuse_score_matrix(cost_matrix, detections, tracks):
|
||||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
iou_sim = 1 - cost_matrix
|
||||
|
||||
det_scores = np.array([det.score for det in detections])
|
||||
det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
|
||||
trk_scores = np.array([trk.score for trk in tracks])
|
||||
trk_scores = np.expand_dims(trk_scores, axis=1).repeat(cost_matrix.shape[1], axis=1)
|
||||
mid_scores = (det_scores + trk_scores) / 2
|
||||
fuse_sim = iou_sim * mid_scores
|
||||
fuse_cost = 1 - fuse_sim
|
||||
|
||||
return fuse_cost
|
||||
|
||||
"""
|
||||
calculate buffered IoU, used in C_BIoU_Tracker
|
||||
"""
|
||||
def buffered_iou_distance(atracks, btracks, level=1):
|
||||
"""
|
||||
atracks: list[C_BIoUSTrack], tracks
|
||||
btracks: list[C_BIoUSTrack], detections
|
||||
level: cascade level, 1 or 2
|
||||
"""
|
||||
assert level in [1, 2], 'level must be 1 or 2'
|
||||
if level == 1: # use motion_state1(tracks) and buffer_bbox1(detections) to calculate
|
||||
atlbrs = [track.tlwh_to_tlbr(track.motion_state1) for track in atracks]
|
||||
btlbrs = [det.tlwh_to_tlbr(det.buffer_bbox1) for det in btracks]
|
||||
else:
|
||||
atlbrs = [track.tlwh_to_tlbr(track.motion_state2) for track in atracks]
|
||||
btlbrs = [det.tlwh_to_tlbr(det.buffer_bbox2) for det in btracks]
|
||||
_ious = ious(atlbrs, btlbrs)
|
||||
|
||||
cost_matrix = 1 - _ious
|
||||
return cost_matrix
|
||||
|
||||
"""
|
||||
observation centric association, with velocity, for OC Sort
|
||||
"""
|
||||
def observation_centric_association(tracklets, detections, iou_threshold, velocities, previous_obs, vdc_weight):
|
||||
|
||||
if(len(tracklets) == 0):
|
||||
return np.empty((0, 2), dtype=int), tuple(range(len(tracklets))), tuple(range(len(detections)))
|
||||
|
||||
# get numpy format bboxes
|
||||
trk_tlbrs = np.array([track.tlbr for track in tracklets])
|
||||
det_tlbrs = np.array([det.tlbr for det in detections])
|
||||
det_scores = np.array([det.score for det in detections])
|
||||
|
||||
iou_matrix = bbox_ious(trk_tlbrs, det_tlbrs)
|
||||
|
||||
Y, X = speed_direction_batch(det_tlbrs, previous_obs)
|
||||
inertia_Y, inertia_X = velocities[:,0], velocities[:,1]
|
||||
inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
|
||||
inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
|
||||
diff_angle_cos = inertia_X * X + inertia_Y * Y
|
||||
diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
|
||||
diff_angle = np.arccos(diff_angle_cos)
|
||||
diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
|
||||
|
||||
valid_mask = np.ones(previous_obs.shape[0])
|
||||
valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
|
||||
|
||||
scores = np.repeat(det_scores[:, np.newaxis], trk_tlbrs.shape[0], axis=1)
|
||||
valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
|
||||
|
||||
angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
|
||||
angle_diff_cost = angle_diff_cost * scores.T
|
||||
|
||||
matches, unmatched_a, unmatched_b = linear_assignment(- (iou_matrix + angle_diff_cost), thresh=0.9)
|
||||
|
||||
|
||||
return matches, unmatched_a, unmatched_b
|
||||
|
||||
"""
|
||||
helper func of observation_centric_association
|
||||
"""
|
||||
def speed_direction_batch(dets, tracks):
|
||||
tracks = tracks[..., np.newaxis]
|
||||
CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:,1] + dets[:,3]) / 2.0
|
||||
CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0
|
||||
dx = CX2 - CX1
|
||||
dy = CY2 - CY1
|
||||
norm = np.sqrt(dx**2 + dy**2) + 1e-6
|
||||
dx = dx / norm
|
||||
dy = dy / norm
|
||||
return dy, dx # size: num_track x num_det
|
||||
|
||||
|
||||
def matching_cascade(
|
||||
distance_metric, matching_thresh, cascade_depth, tracks, detections,
|
||||
track_indices=None, detection_indices=None):
|
||||
"""
|
||||
Run matching cascade in DeepSORT
|
||||
|
||||
distance_metirc: function that calculate the cost matrix
|
||||
matching_thresh: float, Associations with cost larger than this value are disregarded.
|
||||
cascade_path: int, equal to max_age of a tracklet
|
||||
tracks: List[STrack], current tracks
|
||||
detections: List[STrack], current detections
|
||||
track_indices: List[int], tracks that will be calculated, Default None
|
||||
detection_indices: List[int], detections that will be calculated, Default None
|
||||
|
||||
return:
|
||||
matched pair, unmatched tracks, unmatced detections: List[int], List[int], List[int]
|
||||
"""
|
||||
if track_indices is None:
|
||||
track_indices = list(range(len(tracks)))
|
||||
if detection_indices is None:
|
||||
detection_indices = list(range(len(detections)))
|
||||
|
||||
detections_to_match = detection_indices
|
||||
matches = []
|
||||
|
||||
for level in range(cascade_depth):
|
||||
"""
|
||||
match new track with detection firstly
|
||||
"""
|
||||
if not len(detections_to_match): # No detections left
|
||||
break
|
||||
|
||||
track_indices_l = [
|
||||
k for k in track_indices
|
||||
if tracks[k].time_since_update == 1 + level
|
||||
] # filter tracks whose age is equal to level + 1 (The age of Newest track = 1)
|
||||
|
||||
if not len(track_indices_l): # Nothing to match at this level
|
||||
continue
|
||||
|
||||
# tracks and detections which will be mathcted in current level
|
||||
track_l = [tracks[idx] for idx in track_indices_l] # List[STrack]
|
||||
det_l = [detections[idx] for idx in detections_to_match] # List[STrack]
|
||||
|
||||
# calculate the cost matrix
|
||||
cost_matrix = distance_metric(track_l, det_l)
|
||||
|
||||
# solve the linear assignment problem
|
||||
matched_row_col, umatched_row, umatched_col = \
|
||||
linear_assignment(cost_matrix, matching_thresh)
|
||||
|
||||
for row, col in matched_row_col: # for those who matched
|
||||
matches.append((track_indices_l[row], detections_to_match[col]))
|
||||
|
||||
umatched_detecion_l = [] # current detections not matched
|
||||
for col in umatched_col: # for detections not matched
|
||||
umatched_detecion_l.append(detections_to_match[col])
|
||||
|
||||
detections_to_match = umatched_detecion_l # update detections to match for next level
|
||||
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
|
||||
|
||||
return matches, unmatched_tracks, detections_to_match
|
||||
|
||||
def nearest_embedding_distance(tracks, detections, metric='cosine'):
|
||||
"""
|
||||
different from embedding distance, this func calculate the
|
||||
nearest distance among all track history features and detections
|
||||
|
||||
tracks: list[STrack]
|
||||
detections: list[STrack]
|
||||
metric: str, cosine or euclidean
|
||||
TODO: support euclidean distance
|
||||
|
||||
return:
|
||||
cost_matrix, np.ndarray, shape(len(tracks), len(detections))
|
||||
"""
|
||||
cost_matrix = np.zeros((len(tracks), len(detections)))
|
||||
det_features = np.asarray([det.features[-1] for det in detections])
|
||||
|
||||
for row, track in enumerate(tracks):
|
||||
track_history_features = np.asarray(track.features)
|
||||
dist = 1. - cal_cosine_distance(track_history_features, det_features)
|
||||
dist = dist.min(axis=0)
|
||||
cost_matrix[row, :] = dist
|
||||
|
||||
return cost_matrix
|
||||
|
||||
def cal_cosine_distance(mat1, mat2):
|
||||
"""
|
||||
simple func to calculate cosine distance between 2 matrixs
|
||||
|
||||
:param mat1: np.ndarray, shape(M, dim)
|
||||
:param mat2: np.ndarray, shape(N, dim)
|
||||
:return: np.ndarray, shape(M, N)
|
||||
"""
|
||||
# result = mat1·mat2^T / |mat1|·|mat2|
|
||||
# norm mat1 and mat2
|
||||
mat1 = mat1 / np.linalg.norm(mat1, axis=1, keepdims=True)
|
||||
mat2 = mat2 / np.linalg.norm(mat2, axis=1, keepdims=True)
|
||||
|
||||
return np.dot(mat1, mat2.T)
|
||||
237
test/yolov7-tracker/tracker/trackers/ocsort_tracker.py
Normal file
237
test/yolov7-tracker/tracker/trackers/ocsort_tracker.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""
|
||||
OC Sort
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet, Tracklet_w_velocity
|
||||
from .matching import *
|
||||
|
||||
from cython_bbox import bbox_overlaps as bbox_ious
|
||||
|
||||
class OCSortTracker(object):
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
self.delta_t = 3
|
||||
|
||||
@staticmethod
|
||||
def k_previous_obs(observations, cur_age, k):
|
||||
if len(observations) == 0:
|
||||
return [-1, -1, -1, -1, -1]
|
||||
for i in range(k):
|
||||
dt = k - i
|
||||
if cur_age - dt in observations:
|
||||
return observations[cur_age-dt]
|
||||
max_age = max(observations.keys())
|
||||
return observations[max_age]
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlbr format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
inds_low = scores > 0.1
|
||||
inds_high = scores < self.args.conf_thresh
|
||||
|
||||
inds_second = np.logical_and(inds_low, inds_high)
|
||||
dets_second = bboxes[inds_second]
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
cates_second = categories[inds_second]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
scores_second = scores[inds_second]
|
||||
|
||||
if len(dets) > 0:
|
||||
'''Detections'''
|
||||
detections = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, Observation Centric Momentum'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
velocities = np.array(
|
||||
[trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in tracklet_pool])
|
||||
|
||||
# last observation, obervation-centric
|
||||
# last_boxes = np.array([trk.last_observation for trk in tracklet_pool])
|
||||
|
||||
# historical observations
|
||||
k_observations = np.array(
|
||||
[self.k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in tracklet_pool])
|
||||
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
# Observation centric cost matrix and assignment
|
||||
matches, u_track, u_detection = observation_centric_association(
|
||||
tracklets=tracklet_pool, detections=detections, iou_threshold=0.3,
|
||||
velocities=velocities, previous_obs=k_observations, vdc_weight=0.2
|
||||
)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
''' Step 3: Second association, with low score detection boxes'''
|
||||
# association the untrack to the low score detections
|
||||
if len(dets_second) > 0:
|
||||
'''Detections'''
|
||||
detections_second = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
|
||||
else:
|
||||
detections_second = []
|
||||
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
|
||||
# for unmatched tracks in the first round, use last obervation
|
||||
r_tracked_tracklets_last_observ = [tracklet_pool[i].last_observation[:4] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
detections_second_bbox = [det.tlbr for det in detections_second]
|
||||
|
||||
dists = 1. - ious(r_tracked_tracklets_last_observ, detections_second_bbox)
|
||||
|
||||
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
|
||||
for itracked, idet in matches:
|
||||
track = r_tracked_tracklets[itracked]
|
||||
det = detections_second[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = r_tracked_tracklets[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = iou_distance(unconfirmed, detections)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
98
test/yolov7-tracker/tracker/trackers/reid_models/AFLink.py
Normal file
98
test/yolov7-tracker/tracker/trackers/reid_models/AFLink.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
AFLink code in StrongSORT(StrongSORT: Make DeepSORT Great Again(arxiv))
|
||||
|
||||
copied from origin repo
|
||||
"""
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
import cv2
|
||||
import logging
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
|
||||
class TemporalBlock(nn.Module):
|
||||
def __init__(self, cin, cout):
|
||||
super(TemporalBlock, self).__init__()
|
||||
self.conv = nn.Conv2d(cin, cout, (7, 1), bias=False)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.bnf = nn.BatchNorm1d(cout)
|
||||
self.bnx = nn.BatchNorm1d(cout)
|
||||
self.bny = nn.BatchNorm1d(cout)
|
||||
|
||||
def bn(self, x):
|
||||
x[:, :, :, 0] = self.bnf(x[:, :, :, 0])
|
||||
x[:, :, :, 1] = self.bnx(x[:, :, :, 1])
|
||||
x[:, :, :, 2] = self.bny(x[:, :, :, 2])
|
||||
return x
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class FusionBlock(nn.Module):
|
||||
def __init__(self, cin, cout):
|
||||
super(FusionBlock, self).__init__()
|
||||
self.conv = nn.Conv2d(cin, cout, (1, 3), bias=False)
|
||||
self.bn = nn.BatchNorm2d(cout)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class Classifier(nn.Module):
|
||||
def __init__(self, cin):
|
||||
super(Classifier, self).__init__()
|
||||
self.fc1 = nn.Linear(cin*2, cin//2)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.fc2 = nn.Linear(cin//2, 2)
|
||||
|
||||
def forward(self, x1, x2):
|
||||
x = torch.cat((x1, x2), dim=1)
|
||||
x = self.fc1(x)
|
||||
x = self.relu(x)
|
||||
x = self.fc2(x)
|
||||
return x
|
||||
|
||||
|
||||
class PostLinker(nn.Module):
|
||||
def __init__(self):
|
||||
super(PostLinker, self).__init__()
|
||||
self.TemporalModule_1 = nn.Sequential(
|
||||
TemporalBlock(1, 32),
|
||||
TemporalBlock(32, 64),
|
||||
TemporalBlock(64, 128),
|
||||
TemporalBlock(128, 256)
|
||||
)
|
||||
self.TemporalModule_2 = nn.Sequential(
|
||||
TemporalBlock(1, 32),
|
||||
TemporalBlock(32, 64),
|
||||
TemporalBlock(64, 128),
|
||||
TemporalBlock(128, 256)
|
||||
)
|
||||
self.FusionBlock_1 = FusionBlock(256, 256)
|
||||
self.FusionBlock_2 = FusionBlock(256, 256)
|
||||
self.pooling = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.classifier = Classifier(256)
|
||||
|
||||
def forward(self, x1, x2):
|
||||
x1 = x1[:, :, :, :3]
|
||||
x2 = x2[:, :, :, :3]
|
||||
x1 = self.TemporalModule_1(x1) # [B,1,30,3] -> [B,256,6,3]
|
||||
x2 = self.TemporalModule_2(x2)
|
||||
x1 = self.FusionBlock_1(x1)
|
||||
x2 = self.FusionBlock_2(x2)
|
||||
x1 = self.pooling(x1).squeeze(-1).squeeze(-1)
|
||||
x2 = self.pooling(x2).squeeze(-1).squeeze(-1)
|
||||
y = self.classifier(x1, x2)
|
||||
if not self.training:
|
||||
y = torch.softmax(y, dim=1)
|
||||
return y
|
||||
598
test/yolov7-tracker/tracker/trackers/reid_models/OSNet.py
Normal file
598
test/yolov7-tracker/tracker/trackers/reid_models/OSNet.py
Normal file
@@ -0,0 +1,598 @@
|
||||
from __future__ import division, absolute_import
|
||||
import warnings
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
__all__ = [
|
||||
'osnet_x1_0', 'osnet_x0_75', 'osnet_x0_5', 'osnet_x0_25', 'osnet_ibn_x1_0'
|
||||
]
|
||||
|
||||
pretrained_urls = {
|
||||
'osnet_x1_0':
|
||||
'https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY',
|
||||
'osnet_x0_75':
|
||||
'https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq',
|
||||
'osnet_x0_5':
|
||||
'https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i',
|
||||
'osnet_x0_25':
|
||||
'https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs',
|
||||
'osnet_ibn_x1_0':
|
||||
'https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l'
|
||||
}
|
||||
|
||||
|
||||
##########
|
||||
# Basic layers
|
||||
##########
|
||||
class ConvLayer(nn.Module):
|
||||
"""Convolution layer (conv + bn + relu)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding=0,
|
||||
groups=1,
|
||||
IN=False
|
||||
):
|
||||
super(ConvLayer, self).__init__()
|
||||
self.conv = nn.Conv2d(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
bias=False,
|
||||
groups=groups
|
||||
)
|
||||
if IN:
|
||||
self.bn = nn.InstanceNorm2d(out_channels, affine=True)
|
||||
else:
|
||||
self.bn = nn.BatchNorm2d(out_channels)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class Conv1x1(nn.Module):
|
||||
"""1x1 convolution + bn + relu."""
|
||||
|
||||
def __init__(self, in_channels, out_channels, stride=1, groups=1):
|
||||
super(Conv1x1, self).__init__()
|
||||
self.conv = nn.Conv2d(
|
||||
in_channels,
|
||||
out_channels,
|
||||
1,
|
||||
stride=stride,
|
||||
padding=0,
|
||||
bias=False,
|
||||
groups=groups
|
||||
)
|
||||
self.bn = nn.BatchNorm2d(out_channels)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class Conv1x1Linear(nn.Module):
|
||||
"""1x1 convolution + bn (w/o non-linearity)."""
|
||||
|
||||
def __init__(self, in_channels, out_channels, stride=1):
|
||||
super(Conv1x1Linear, self).__init__()
|
||||
self.conv = nn.Conv2d(
|
||||
in_channels, out_channels, 1, stride=stride, padding=0, bias=False
|
||||
)
|
||||
self.bn = nn.BatchNorm2d(out_channels)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class Conv3x3(nn.Module):
|
||||
"""3x3 convolution + bn + relu."""
|
||||
|
||||
def __init__(self, in_channels, out_channels, stride=1, groups=1):
|
||||
super(Conv3x3, self).__init__()
|
||||
self.conv = nn.Conv2d(
|
||||
in_channels,
|
||||
out_channels,
|
||||
3,
|
||||
stride=stride,
|
||||
padding=1,
|
||||
bias=False,
|
||||
groups=groups
|
||||
)
|
||||
self.bn = nn.BatchNorm2d(out_channels)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class LightConv3x3(nn.Module):
|
||||
"""Lightweight 3x3 convolution.
|
||||
|
||||
1x1 (linear) + dw 3x3 (nonlinear).
|
||||
"""
|
||||
|
||||
def __init__(self, in_channels, out_channels):
|
||||
super(LightConv3x3, self).__init__()
|
||||
self.conv1 = nn.Conv2d(
|
||||
in_channels, out_channels, 1, stride=1, padding=0, bias=False
|
||||
)
|
||||
self.conv2 = nn.Conv2d(
|
||||
out_channels,
|
||||
out_channels,
|
||||
3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
bias=False,
|
||||
groups=out_channels
|
||||
)
|
||||
self.bn = nn.BatchNorm2d(out_channels)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
##########
|
||||
# Building blocks for omni-scale feature learning
|
||||
##########
|
||||
class ChannelGate(nn.Module):
|
||||
"""A mini-network that generates channel-wise gates conditioned on input tensor."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels,
|
||||
num_gates=None,
|
||||
return_gates=False,
|
||||
gate_activation='sigmoid',
|
||||
reduction=16,
|
||||
layer_norm=False
|
||||
):
|
||||
super(ChannelGate, self).__init__()
|
||||
if num_gates is None:
|
||||
num_gates = in_channels
|
||||
self.return_gates = return_gates
|
||||
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
|
||||
self.fc1 = nn.Conv2d(
|
||||
in_channels,
|
||||
in_channels // reduction,
|
||||
kernel_size=1,
|
||||
bias=True,
|
||||
padding=0
|
||||
)
|
||||
self.norm1 = None
|
||||
if layer_norm:
|
||||
self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.fc2 = nn.Conv2d(
|
||||
in_channels // reduction,
|
||||
num_gates,
|
||||
kernel_size=1,
|
||||
bias=True,
|
||||
padding=0
|
||||
)
|
||||
if gate_activation == 'sigmoid':
|
||||
self.gate_activation = nn.Sigmoid()
|
||||
elif gate_activation == 'relu':
|
||||
self.gate_activation = nn.ReLU(inplace=True)
|
||||
elif gate_activation == 'linear':
|
||||
self.gate_activation = None
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Unknown gate activation: {}".format(gate_activation)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
input = x
|
||||
x = self.global_avgpool(x)
|
||||
x = self.fc1(x)
|
||||
if self.norm1 is not None:
|
||||
x = self.norm1(x)
|
||||
x = self.relu(x)
|
||||
x = self.fc2(x)
|
||||
if self.gate_activation is not None:
|
||||
x = self.gate_activation(x)
|
||||
if self.return_gates:
|
||||
return x
|
||||
return input * x
|
||||
|
||||
|
||||
class OSBlock(nn.Module):
|
||||
"""Omni-scale feature learning block."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
IN=False,
|
||||
bottleneck_reduction=4,
|
||||
**kwargs
|
||||
):
|
||||
super(OSBlock, self).__init__()
|
||||
mid_channels = out_channels // bottleneck_reduction
|
||||
self.conv1 = Conv1x1(in_channels, mid_channels)
|
||||
self.conv2a = LightConv3x3(mid_channels, mid_channels)
|
||||
self.conv2b = nn.Sequential(
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
)
|
||||
self.conv2c = nn.Sequential(
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
)
|
||||
self.conv2d = nn.Sequential(
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
)
|
||||
self.gate = ChannelGate(mid_channels)
|
||||
self.conv3 = Conv1x1Linear(mid_channels, out_channels)
|
||||
self.downsample = None
|
||||
if in_channels != out_channels:
|
||||
self.downsample = Conv1x1Linear(in_channels, out_channels)
|
||||
self.IN = None
|
||||
if IN:
|
||||
self.IN = nn.InstanceNorm2d(out_channels, affine=True)
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
x1 = self.conv1(x)
|
||||
x2a = self.conv2a(x1)
|
||||
x2b = self.conv2b(x1)
|
||||
x2c = self.conv2c(x1)
|
||||
x2d = self.conv2d(x1)
|
||||
x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
|
||||
x3 = self.conv3(x2)
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(identity)
|
||||
out = x3 + identity
|
||||
if self.IN is not None:
|
||||
out = self.IN(out)
|
||||
return F.relu(out)
|
||||
|
||||
|
||||
##########
|
||||
# Network architecture
|
||||
##########
|
||||
class OSNet(nn.Module):
|
||||
"""Omni-Scale Network.
|
||||
|
||||
Reference:
|
||||
- Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.
|
||||
- Zhou et al. Learning Generalisable Omni-Scale Representations
|
||||
for Person Re-Identification. TPAMI, 2021.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
num_classes,
|
||||
blocks,
|
||||
layers,
|
||||
channels,
|
||||
feature_dim=512,
|
||||
loss='softmax',
|
||||
IN=False,
|
||||
**kwargs
|
||||
):
|
||||
super(OSNet, self).__init__()
|
||||
num_blocks = len(blocks)
|
||||
assert num_blocks == len(layers)
|
||||
assert num_blocks == len(channels) - 1
|
||||
self.loss = loss
|
||||
self.feature_dim = feature_dim
|
||||
|
||||
# convolutional backbone
|
||||
self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
|
||||
self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
|
||||
self.conv2 = self._make_layer(
|
||||
blocks[0],
|
||||
layers[0],
|
||||
channels[0],
|
||||
channels[1],
|
||||
reduce_spatial_size=True,
|
||||
IN=IN
|
||||
)
|
||||
self.conv3 = self._make_layer(
|
||||
blocks[1],
|
||||
layers[1],
|
||||
channels[1],
|
||||
channels[2],
|
||||
reduce_spatial_size=True
|
||||
)
|
||||
self.conv4 = self._make_layer(
|
||||
blocks[2],
|
||||
layers[2],
|
||||
channels[2],
|
||||
channels[3],
|
||||
reduce_spatial_size=False
|
||||
)
|
||||
self.conv5 = Conv1x1(channels[3], channels[3])
|
||||
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
|
||||
# fully connected layer
|
||||
self.fc = self._construct_fc_layer(
|
||||
self.feature_dim, channels[3], dropout_p=None
|
||||
)
|
||||
# identity classification layer
|
||||
self.classifier = nn.Linear(self.feature_dim, num_classes)
|
||||
|
||||
self._init_params()
|
||||
|
||||
def _make_layer(
|
||||
self,
|
||||
block,
|
||||
layer,
|
||||
in_channels,
|
||||
out_channels,
|
||||
reduce_spatial_size,
|
||||
IN=False
|
||||
):
|
||||
layers = []
|
||||
|
||||
layers.append(block(in_channels, out_channels, IN=IN))
|
||||
for i in range(1, layer):
|
||||
layers.append(block(out_channels, out_channels, IN=IN))
|
||||
|
||||
if reduce_spatial_size:
|
||||
layers.append(
|
||||
nn.Sequential(
|
||||
Conv1x1(out_channels, out_channels),
|
||||
nn.AvgPool2d(2, stride=2)
|
||||
)
|
||||
)
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
|
||||
if fc_dims is None or fc_dims < 0:
|
||||
self.feature_dim = input_dim
|
||||
return None
|
||||
|
||||
if isinstance(fc_dims, int):
|
||||
fc_dims = [fc_dims]
|
||||
|
||||
layers = []
|
||||
for dim in fc_dims:
|
||||
layers.append(nn.Linear(input_dim, dim))
|
||||
layers.append(nn.BatchNorm1d(dim))
|
||||
layers.append(nn.ReLU(inplace=True))
|
||||
if dropout_p is not None:
|
||||
layers.append(nn.Dropout(p=dropout_p))
|
||||
input_dim = dim
|
||||
|
||||
self.feature_dim = fc_dims[-1]
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def _init_params(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(
|
||||
m.weight, mode='fan_out', nonlinearity='relu'
|
||||
)
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
elif isinstance(m, nn.BatchNorm1d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.normal_(m.weight, 0, 0.01)
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def featuremaps(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.maxpool(x)
|
||||
x = self.conv2(x)
|
||||
x = self.conv3(x)
|
||||
x = self.conv4(x)
|
||||
x = self.conv5(x)
|
||||
return x
|
||||
|
||||
def forward(self, x, return_featuremaps=False):
|
||||
x = self.featuremaps(x)
|
||||
if return_featuremaps:
|
||||
return x
|
||||
v = self.global_avgpool(x)
|
||||
v = v.view(v.size(0), -1)
|
||||
if self.fc is not None:
|
||||
v = self.fc(v)
|
||||
if not self.training:
|
||||
return v
|
||||
y = self.classifier(v)
|
||||
if self.loss == 'softmax':
|
||||
return y
|
||||
elif self.loss == 'triplet':
|
||||
return y, v
|
||||
else:
|
||||
raise KeyError("Unsupported loss: {}".format(self.loss))
|
||||
|
||||
|
||||
def init_pretrained_weights(model, key=''):
|
||||
"""Initializes model with pretrained weights.
|
||||
|
||||
Layers that don't match with pretrained layers in name or size are kept unchanged.
|
||||
"""
|
||||
import os
|
||||
import errno
|
||||
import gdown
|
||||
from collections import OrderedDict
|
||||
|
||||
def _get_torch_home():
|
||||
ENV_TORCH_HOME = 'TORCH_HOME'
|
||||
ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
|
||||
DEFAULT_CACHE_DIR = '~/.cache'
|
||||
torch_home = os.path.expanduser(
|
||||
os.getenv(
|
||||
ENV_TORCH_HOME,
|
||||
os.path.join(
|
||||
os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
|
||||
)
|
||||
)
|
||||
)
|
||||
return torch_home
|
||||
|
||||
torch_home = _get_torch_home()
|
||||
model_dir = os.path.join(torch_home, 'checkpoints')
|
||||
try:
|
||||
os.makedirs(model_dir)
|
||||
except OSError as e:
|
||||
if e.errno == errno.EEXIST:
|
||||
# Directory already exists, ignore.
|
||||
pass
|
||||
else:
|
||||
# Unexpected OSError, re-raise.
|
||||
raise
|
||||
filename = key + '_imagenet.pth'
|
||||
cached_file = os.path.join(model_dir, filename)
|
||||
|
||||
if not os.path.exists(cached_file):
|
||||
gdown.download(pretrained_urls[key], cached_file, quiet=False)
|
||||
|
||||
state_dict = torch.load(cached_file)
|
||||
model_dict = model.state_dict()
|
||||
new_state_dict = OrderedDict()
|
||||
matched_layers, discarded_layers = [], []
|
||||
|
||||
for k, v in state_dict.items():
|
||||
if k.startswith('module.'):
|
||||
k = k[7:] # discard module.
|
||||
|
||||
if k in model_dict and model_dict[k].size() == v.size():
|
||||
new_state_dict[k] = v
|
||||
matched_layers.append(k)
|
||||
else:
|
||||
discarded_layers.append(k)
|
||||
|
||||
model_dict.update(new_state_dict)
|
||||
model.load_state_dict(model_dict)
|
||||
|
||||
if len(matched_layers) == 0:
|
||||
warnings.warn(
|
||||
'The pretrained weights from "{}" cannot be loaded, '
|
||||
'please check the key names manually '
|
||||
'(** ignored and continue **)'.format(cached_file)
|
||||
)
|
||||
else:
|
||||
print(
|
||||
'Successfully loaded imagenet pretrained weights from "{}"'.
|
||||
format(cached_file)
|
||||
)
|
||||
if len(discarded_layers) > 0:
|
||||
print(
|
||||
'** The following layers are discarded '
|
||||
'due to unmatched keys or layer size: {}'.
|
||||
format(discarded_layers)
|
||||
)
|
||||
|
||||
|
||||
##########
|
||||
# Instantiation
|
||||
##########
|
||||
def osnet_x1_0(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
|
||||
# standard size (width x1.0)
|
||||
model = OSNet(
|
||||
num_classes,
|
||||
blocks=[OSBlock, OSBlock, OSBlock],
|
||||
layers=[2, 2, 2],
|
||||
channels=[64, 256, 384, 512],
|
||||
loss=loss,
|
||||
**kwargs
|
||||
)
|
||||
if pretrained:
|
||||
init_pretrained_weights(model, key='osnet_x1_0')
|
||||
return model
|
||||
|
||||
|
||||
def osnet_x0_75(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
|
||||
# medium size (width x0.75)
|
||||
model = OSNet(
|
||||
num_classes,
|
||||
blocks=[OSBlock, OSBlock, OSBlock],
|
||||
layers=[2, 2, 2],
|
||||
channels=[48, 192, 288, 384],
|
||||
loss=loss,
|
||||
**kwargs
|
||||
)
|
||||
if pretrained:
|
||||
init_pretrained_weights(model, key='osnet_x0_75')
|
||||
return model
|
||||
|
||||
|
||||
def osnet_x0_5(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
|
||||
# tiny size (width x0.5)
|
||||
model = OSNet(
|
||||
num_classes,
|
||||
blocks=[OSBlock, OSBlock, OSBlock],
|
||||
layers=[2, 2, 2],
|
||||
channels=[32, 128, 192, 256],
|
||||
loss=loss,
|
||||
**kwargs
|
||||
)
|
||||
if pretrained:
|
||||
init_pretrained_weights(model, key='osnet_x0_5')
|
||||
return model
|
||||
|
||||
|
||||
def osnet_x0_25(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
|
||||
# very tiny size (width x0.25)
|
||||
model = OSNet(
|
||||
num_classes,
|
||||
blocks=[OSBlock, OSBlock, OSBlock],
|
||||
layers=[2, 2, 2],
|
||||
channels=[16, 64, 96, 128],
|
||||
loss=loss,
|
||||
**kwargs
|
||||
)
|
||||
if pretrained:
|
||||
init_pretrained_weights(model, key='osnet_x0_25')
|
||||
return model
|
||||
|
||||
|
||||
def osnet_ibn_x1_0(
|
||||
num_classes=1000, pretrained=True, loss='softmax', **kwargs
|
||||
):
|
||||
# standard size (width x1.0) + IBN layer
|
||||
# Ref: Pan et al. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. ECCV, 2018.
|
||||
model = OSNet(
|
||||
num_classes,
|
||||
blocks=[OSBlock, OSBlock, OSBlock],
|
||||
layers=[2, 2, 2],
|
||||
channels=[64, 256, 384, 512],
|
||||
loss=loss,
|
||||
IN=True,
|
||||
**kwargs
|
||||
)
|
||||
if pretrained:
|
||||
init_pretrained_weights(model, key='osnet_ibn_x1_0')
|
||||
return model
|
||||
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
file for reid_models folder
|
||||
"""
|
||||
@@ -0,0 +1,157 @@
|
||||
"""
|
||||
file for DeepSORT Re-ID model
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
import cv2
|
||||
import logging
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
def __init__(self, c_in, c_out, is_downsample=False):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.is_downsample = is_downsample
|
||||
if is_downsample:
|
||||
self.conv1 = nn.Conv2d(
|
||||
c_in, c_out, 3, stride=2, padding=1, bias=False)
|
||||
else:
|
||||
self.conv1 = nn.Conv2d(
|
||||
c_in, c_out, 3, stride=1, padding=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(c_out)
|
||||
self.relu = nn.ReLU(True)
|
||||
self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1,
|
||||
padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(c_out)
|
||||
if is_downsample:
|
||||
self.downsample = nn.Sequential(
|
||||
nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
|
||||
nn.BatchNorm2d(c_out)
|
||||
)
|
||||
elif c_in != c_out:
|
||||
self.downsample = nn.Sequential(
|
||||
nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
|
||||
nn.BatchNorm2d(c_out)
|
||||
)
|
||||
self.is_downsample = True
|
||||
|
||||
def forward(self, x):
|
||||
y = self.conv1(x)
|
||||
y = self.bn1(y)
|
||||
y = self.relu(y)
|
||||
y = self.conv2(y)
|
||||
y = self.bn2(y)
|
||||
if self.is_downsample:
|
||||
x = self.downsample(x)
|
||||
return F.relu(x.add(y), True)
|
||||
|
||||
|
||||
def make_layers(c_in, c_out, repeat_times, is_downsample=False):
|
||||
blocks = []
|
||||
for i in range(repeat_times):
|
||||
if i == 0:
|
||||
blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ]
|
||||
else:
|
||||
blocks += [BasicBlock(c_out, c_out), ]
|
||||
return nn.Sequential(*blocks)
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self, num_classes=751, reid=False):
|
||||
super(Net, self).__init__()
|
||||
# 3 128 64
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(3, 64, 3, stride=1, padding=1),
|
||||
nn.BatchNorm2d(64),
|
||||
nn.ReLU(inplace=True),
|
||||
# nn.Conv2d(32,32,3,stride=1,padding=1),
|
||||
# nn.BatchNorm2d(32),
|
||||
# nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(3, 2, padding=1),
|
||||
)
|
||||
# 32 64 32
|
||||
self.layer1 = make_layers(64, 64, 2, False)
|
||||
# 32 64 32
|
||||
self.layer2 = make_layers(64, 128, 2, True)
|
||||
# 64 32 16
|
||||
self.layer3 = make_layers(128, 256, 2, True)
|
||||
# 128 16 8
|
||||
self.layer4 = make_layers(256, 512, 2, True)
|
||||
# 256 8 4
|
||||
self.avgpool = nn.AvgPool2d((8, 4), 1)
|
||||
# 256 1 1
|
||||
self.reid = reid
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Linear(512, 256),
|
||||
nn.BatchNorm1d(256),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Dropout(),
|
||||
nn.Linear(256, num_classes),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
x = self.avgpool(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
# B x 128
|
||||
if self.reid:
|
||||
x = x.div(x.norm(p=2, dim=1, keepdim=True))
|
||||
return x
|
||||
# classifier
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
|
||||
class Extractor(object):
|
||||
def __init__(self, model_path, use_cuda=True):
|
||||
self.net = Net(reid=True)
|
||||
self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
|
||||
state_dict = torch.load(model_path, map_location=torch.device(self.device))[
|
||||
'net_dict']
|
||||
self.net.load_state_dict(state_dict)
|
||||
logger = logging.getLogger("root.tracker")
|
||||
logger.info("Loading weights from {}... Done!".format(model_path))
|
||||
self.net.to(self.device)
|
||||
self.size = (64, 128)
|
||||
self.norm = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
|
||||
])
|
||||
|
||||
def _preprocess(self, im_crops):
|
||||
"""
|
||||
TODO:
|
||||
1. to float with scale from 0 to 1
|
||||
2. resize to (64, 128) as Market1501 dataset did
|
||||
3. concatenate to a numpy array
|
||||
3. to torch Tensor
|
||||
4. normalize
|
||||
"""
|
||||
def _resize(im, size):
|
||||
try:
|
||||
return cv2.resize(im.astype(np.float32)/255., size)
|
||||
except:
|
||||
print('Error: size in bbox exists zero, ', im.shape)
|
||||
exit(0)
|
||||
|
||||
im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(
|
||||
0) for im in im_crops], dim=0).float()
|
||||
return im_batch
|
||||
|
||||
def __call__(self, im_crops):
|
||||
if isinstance(im_crops, list):
|
||||
im_batch = self._preprocess(im_crops)
|
||||
else:
|
||||
im_batch = im_crops
|
||||
|
||||
with torch.no_grad():
|
||||
im_batch = im_batch.to(self.device)
|
||||
features = self.net(im_batch)
|
||||
return features
|
||||
@@ -0,0 +1,273 @@
|
||||
"""
|
||||
load checkpoint file
|
||||
copied from https://github.com/mikel-brostrom/Yolov5_StrongSORT_OSNet
|
||||
"""
|
||||
from __future__ import division, print_function, absolute_import
|
||||
import pickle
|
||||
import shutil
|
||||
import os.path as osp
|
||||
import warnings
|
||||
from functools import partial
|
||||
from collections import OrderedDict
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
__all__ = [
|
||||
'save_checkpoint', 'load_checkpoint', 'resume_from_checkpoint',
|
||||
'open_all_layers', 'open_specified_layers', 'count_num_param',
|
||||
'load_pretrained_weights'
|
||||
]
|
||||
|
||||
def load_checkpoint(fpath):
|
||||
r"""Loads checkpoint.
|
||||
|
||||
``UnicodeDecodeError`` can be well handled, which means
|
||||
python2-saved files can be read from python3.
|
||||
|
||||
Args:
|
||||
fpath (str): path to checkpoint.
|
||||
|
||||
Returns:
|
||||
dict
|
||||
|
||||
Examples::
|
||||
>>> from torchreid.utils import load_checkpoint
|
||||
>>> fpath = 'log/my_model/model.pth.tar-10'
|
||||
>>> checkpoint = load_checkpoint(fpath)
|
||||
"""
|
||||
if fpath is None:
|
||||
raise ValueError('File path is None')
|
||||
fpath = osp.abspath(osp.expanduser(fpath))
|
||||
if not osp.exists(fpath):
|
||||
raise FileNotFoundError('File is not found at "{}"'.format(fpath))
|
||||
map_location = None if torch.cuda.is_available() else 'cpu'
|
||||
try:
|
||||
checkpoint = torch.load(fpath, map_location=map_location)
|
||||
except UnicodeDecodeError:
|
||||
pickle.load = partial(pickle.load, encoding="latin1")
|
||||
pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1")
|
||||
checkpoint = torch.load(
|
||||
fpath, pickle_module=pickle, map_location=map_location
|
||||
)
|
||||
except Exception:
|
||||
print('Unable to load checkpoint from "{}"'.format(fpath))
|
||||
raise
|
||||
return checkpoint
|
||||
|
||||
|
||||
def resume_from_checkpoint(fpath, model, optimizer=None, scheduler=None):
|
||||
r"""Resumes training from a checkpoint.
|
||||
|
||||
This will load (1) model weights and (2) ``state_dict``
|
||||
of optimizer if ``optimizer`` is not None.
|
||||
|
||||
Args:
|
||||
fpath (str): path to checkpoint.
|
||||
model (nn.Module): model.
|
||||
optimizer (Optimizer, optional): an Optimizer.
|
||||
scheduler (LRScheduler, optional): an LRScheduler.
|
||||
|
||||
Returns:
|
||||
int: start_epoch.
|
||||
|
||||
Examples::
|
||||
>>> from torchreid.utils import resume_from_checkpoint
|
||||
>>> fpath = 'log/my_model/model.pth.tar-10'
|
||||
>>> start_epoch = resume_from_checkpoint(
|
||||
>>> fpath, model, optimizer, scheduler
|
||||
>>> )
|
||||
"""
|
||||
print('Loading checkpoint from "{}"'.format(fpath))
|
||||
checkpoint = load_checkpoint(fpath)
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
print('Loaded model weights')
|
||||
if optimizer is not None and 'optimizer' in checkpoint.keys():
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
print('Loaded optimizer')
|
||||
if scheduler is not None and 'scheduler' in checkpoint.keys():
|
||||
scheduler.load_state_dict(checkpoint['scheduler'])
|
||||
print('Loaded scheduler')
|
||||
start_epoch = checkpoint['epoch']
|
||||
print('Last epoch = {}'.format(start_epoch))
|
||||
if 'rank1' in checkpoint.keys():
|
||||
print('Last rank1 = {:.1%}'.format(checkpoint['rank1']))
|
||||
return start_epoch
|
||||
|
||||
|
||||
def adjust_learning_rate(
|
||||
optimizer,
|
||||
base_lr,
|
||||
epoch,
|
||||
stepsize=20,
|
||||
gamma=0.1,
|
||||
linear_decay=False,
|
||||
final_lr=0,
|
||||
max_epoch=100
|
||||
):
|
||||
r"""Adjusts learning rate.
|
||||
|
||||
Deprecated.
|
||||
"""
|
||||
if linear_decay:
|
||||
# linearly decay learning rate from base_lr to final_lr
|
||||
frac_done = epoch / max_epoch
|
||||
lr = frac_done*final_lr + (1.-frac_done) * base_lr
|
||||
else:
|
||||
# decay learning rate by gamma for every stepsize
|
||||
lr = base_lr * (gamma**(epoch // stepsize))
|
||||
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
|
||||
|
||||
def set_bn_to_eval(m):
|
||||
r"""Sets BatchNorm layers to eval mode."""
|
||||
# 1. no update for running mean and var
|
||||
# 2. scale and shift parameters are still trainable
|
||||
classname = m.__class__.__name__
|
||||
if classname.find('BatchNorm') != -1:
|
||||
m.eval()
|
||||
|
||||
|
||||
def open_all_layers(model):
|
||||
r"""Opens all layers in model for training.
|
||||
|
||||
Examples::
|
||||
>>> from torchreid.utils import open_all_layers
|
||||
>>> open_all_layers(model)
|
||||
"""
|
||||
model.train()
|
||||
for p in model.parameters():
|
||||
p.requires_grad = True
|
||||
|
||||
|
||||
def open_specified_layers(model, open_layers):
|
||||
r"""Opens specified layers in model for training while keeping
|
||||
other layers frozen.
|
||||
|
||||
Args:
|
||||
model (nn.Module): neural net model.
|
||||
open_layers (str or list): layers open for training.
|
||||
|
||||
Examples::
|
||||
>>> from torchreid.utils import open_specified_layers
|
||||
>>> # Only model.classifier will be updated.
|
||||
>>> open_layers = 'classifier'
|
||||
>>> open_specified_layers(model, open_layers)
|
||||
>>> # Only model.fc and model.classifier will be updated.
|
||||
>>> open_layers = ['fc', 'classifier']
|
||||
>>> open_specified_layers(model, open_layers)
|
||||
"""
|
||||
if isinstance(model, nn.DataParallel):
|
||||
model = model.module
|
||||
|
||||
if isinstance(open_layers, str):
|
||||
open_layers = [open_layers]
|
||||
|
||||
for layer in open_layers:
|
||||
assert hasattr(
|
||||
model, layer
|
||||
), '"{}" is not an attribute of the model, please provide the correct name'.format(
|
||||
layer
|
||||
)
|
||||
|
||||
for name, module in model.named_children():
|
||||
if name in open_layers:
|
||||
module.train()
|
||||
for p in module.parameters():
|
||||
p.requires_grad = True
|
||||
else:
|
||||
module.eval()
|
||||
for p in module.parameters():
|
||||
p.requires_grad = False
|
||||
|
||||
|
||||
def count_num_param(model):
|
||||
r"""Counts number of parameters in a model while ignoring ``self.classifier``.
|
||||
|
||||
Args:
|
||||
model (nn.Module): network model.
|
||||
|
||||
Examples::
|
||||
>>> from torchreid.utils import count_num_param
|
||||
>>> model_size = count_num_param(model)
|
||||
|
||||
.. warning::
|
||||
|
||||
This method is deprecated in favor of
|
||||
``torchreid.utils.compute_model_complexity``.
|
||||
"""
|
||||
warnings.warn(
|
||||
'This method is deprecated and will be removed in the future.'
|
||||
)
|
||||
|
||||
num_param = sum(p.numel() for p in model.parameters())
|
||||
|
||||
if isinstance(model, nn.DataParallel):
|
||||
model = model.module
|
||||
|
||||
if hasattr(model,
|
||||
'classifier') and isinstance(model.classifier, nn.Module):
|
||||
# we ignore the classifier because it is unused at test time
|
||||
num_param -= sum(p.numel() for p in model.classifier.parameters())
|
||||
|
||||
return num_param
|
||||
|
||||
|
||||
def load_pretrained_weights(model, weight_path):
|
||||
r"""Loads pretrianed weights to model.
|
||||
|
||||
Features::
|
||||
- Incompatible layers (unmatched in name or size) will be ignored.
|
||||
- Can automatically deal with keys containing "module.".
|
||||
|
||||
Args:
|
||||
model (nn.Module): network model.
|
||||
weight_path (str): path to pretrained weights.
|
||||
|
||||
Examples::
|
||||
>>> from torchreid.utils import load_pretrained_weights
|
||||
>>> weight_path = 'log/my_model/model-best.pth.tar'
|
||||
>>> load_pretrained_weights(model, weight_path)
|
||||
"""
|
||||
checkpoint = load_checkpoint(weight_path)
|
||||
if 'state_dict' in checkpoint:
|
||||
state_dict = checkpoint['state_dict']
|
||||
else:
|
||||
state_dict = checkpoint
|
||||
|
||||
model_dict = model.state_dict()
|
||||
new_state_dict = OrderedDict()
|
||||
matched_layers, discarded_layers = [], []
|
||||
|
||||
for k, v in state_dict.items():
|
||||
if k.startswith('module.'):
|
||||
k = k[7:] # discard module.
|
||||
|
||||
if k in model_dict and model_dict[k].size() == v.size():
|
||||
new_state_dict[k] = v
|
||||
matched_layers.append(k)
|
||||
else:
|
||||
discarded_layers.append(k)
|
||||
|
||||
model_dict.update(new_state_dict)
|
||||
model.load_state_dict(model_dict)
|
||||
|
||||
if len(matched_layers) == 0:
|
||||
warnings.warn(
|
||||
'The pretrained weights "{}" cannot be loaded, '
|
||||
'please check the key names manually '
|
||||
'(** ignored and continue **)'.format(weight_path)
|
||||
)
|
||||
else:
|
||||
print(
|
||||
'Successfully loaded pretrained weights from "{}"'.
|
||||
format(weight_path)
|
||||
)
|
||||
if len(discarded_layers) > 0:
|
||||
print(
|
||||
'** The following layers are discarded '
|
||||
'due to unmatched keys or layer size: {}'.
|
||||
format(discarded_layers)
|
||||
)
|
||||
169
test/yolov7-tracker/tracker/trackers/sort_tracker.py
Normal file
169
test/yolov7-tracker/tracker/trackers/sort_tracker.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""
|
||||
Sort
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet
|
||||
from .matching import *
|
||||
|
||||
class SortTracker(object):
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlbr format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
|
||||
if len(dets) > 0:
|
||||
'''Detections'''
|
||||
detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with high score detection boxes'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
dists = iou_distance(tracklet_pool, detections)
|
||||
|
||||
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = iou_distance(unconfirmed, detections)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 3: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 4: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
338
test/yolov7-tracker/tracker/trackers/sparse_tracker.py
Normal file
338
test/yolov7-tracker/tracker/trackers/sparse_tracker.py
Normal file
@@ -0,0 +1,338 @@
|
||||
"""
|
||||
Bot sort
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchvision.ops import nms
|
||||
|
||||
import cv2
|
||||
import torchvision.transforms as T
|
||||
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet, Tracklet_w_depth
|
||||
from .matching import *
|
||||
|
||||
from .reid_models.OSNet import *
|
||||
from .reid_models.load_model_tools import load_pretrained_weights
|
||||
from .reid_models.deepsort_reid import Extractor
|
||||
|
||||
from .camera_motion_compensation import GMC
|
||||
|
||||
REID_MODEL_DICT = {
|
||||
'osnet_x1_0': osnet_x1_0,
|
||||
'osnet_x0_75': osnet_x0_75,
|
||||
'osnet_x0_5': osnet_x0_5,
|
||||
'osnet_x0_25': osnet_x0_25,
|
||||
'deepsort': Extractor
|
||||
}
|
||||
|
||||
|
||||
def load_reid_model(reid_model, reid_model_path):
|
||||
|
||||
if 'osnet' in reid_model:
|
||||
func = REID_MODEL_DICT[reid_model]
|
||||
model = func(num_classes=1, pretrained=False, )
|
||||
load_pretrained_weights(model, reid_model_path)
|
||||
model.cuda().eval()
|
||||
|
||||
elif 'deepsort' in reid_model:
|
||||
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return model
|
||||
|
||||
class SparseTracker(object):
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
# camera motion compensation module
|
||||
self.gmc = GMC(method='orb', downscale=2, verbose=None)
|
||||
|
||||
def get_deep_range(self, obj, step):
|
||||
col = []
|
||||
for t in obj:
|
||||
lend = (t.deep_vec)[2]
|
||||
col.append(lend)
|
||||
max_len, mix_len = max(col), min(col)
|
||||
if max_len != mix_len:
|
||||
deep_range =np.arange(mix_len, max_len, (max_len - mix_len + 1) / step)
|
||||
if deep_range[-1] < max_len:
|
||||
deep_range = np.concatenate([deep_range, np.array([max_len],)])
|
||||
deep_range[0] = np.floor(deep_range[0])
|
||||
deep_range[-1] = np.ceil(deep_range[-1])
|
||||
else:
|
||||
deep_range = [mix_len,]
|
||||
mask = self.get_sub_mask(deep_range, col)
|
||||
return mask
|
||||
|
||||
def get_sub_mask(self, deep_range, col):
|
||||
mix_len=deep_range[0]
|
||||
max_len=deep_range[-1]
|
||||
if max_len == mix_len:
|
||||
lc = mix_len
|
||||
mask = []
|
||||
for d in deep_range:
|
||||
if d > deep_range[0] and d < deep_range[-1]:
|
||||
mask.append((col >= lc) & (col < d))
|
||||
lc = d
|
||||
elif d == deep_range[-1]:
|
||||
mask.append((col >= lc) & (col <= d))
|
||||
lc = d
|
||||
else:
|
||||
lc = d
|
||||
continue
|
||||
return mask
|
||||
|
||||
# core function
|
||||
def DCM(self, detections, tracks, activated_tracklets, refind_tracklets, levels, thresh, is_fuse):
|
||||
if len(detections) > 0:
|
||||
det_mask = self.get_deep_range(detections, levels)
|
||||
else:
|
||||
det_mask = []
|
||||
|
||||
if len(tracks)!=0:
|
||||
track_mask = self.get_deep_range(tracks, levels)
|
||||
else:
|
||||
track_mask = []
|
||||
|
||||
u_detection, u_tracks, res_det, res_track = [], [], [], []
|
||||
if len(track_mask) != 0:
|
||||
if len(track_mask) < len(det_mask):
|
||||
for i in range(len(det_mask) - len(track_mask)):
|
||||
idx = np.argwhere(det_mask[len(track_mask) + i] == True)
|
||||
for idd in idx:
|
||||
res_det.append(detections[idd[0]])
|
||||
elif len(track_mask) > len(det_mask):
|
||||
for i in range(len(track_mask) - len(det_mask)):
|
||||
idx = np.argwhere(track_mask[len(det_mask) + i] == True)
|
||||
for idd in idx:
|
||||
res_track.append(tracks[idd[0]])
|
||||
|
||||
for dm, tm in zip(det_mask, track_mask):
|
||||
det_idx = np.argwhere(dm == True)
|
||||
trk_idx = np.argwhere(tm == True)
|
||||
|
||||
# search det
|
||||
det_ = []
|
||||
for idd in det_idx:
|
||||
det_.append(detections[idd[0]])
|
||||
det_ = det_ + u_detection
|
||||
# search trk
|
||||
track_ = []
|
||||
for idt in trk_idx:
|
||||
track_.append(tracks[idt[0]])
|
||||
# update trk
|
||||
track_ = track_ + u_tracks
|
||||
|
||||
dists = iou_distance(track_, det_)
|
||||
|
||||
matches, u_track_, u_det_ = linear_assignment(dists, thresh)
|
||||
for itracked, idet in matches:
|
||||
track = track_[itracked]
|
||||
det = det_[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det_[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
u_tracks = [track_[t] for t in u_track_]
|
||||
u_detection = [det_[t] for t in u_det_]
|
||||
|
||||
u_tracks = u_tracks + res_track
|
||||
u_detection = u_detection + res_det
|
||||
|
||||
else:
|
||||
u_detection = detections
|
||||
|
||||
return activated_tracklets, refind_tracklets, u_tracks, u_detection
|
||||
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlwh format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
inds_low = scores > 0.1
|
||||
inds_high = scores < self.args.conf_thresh
|
||||
|
||||
inds_second = np.logical_and(inds_low, inds_high)
|
||||
dets_second = bboxes[inds_second]
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
cates_second = categories[inds_second]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
scores_second = scores[inds_second]
|
||||
|
||||
if len(dets) > 0:
|
||||
detections = [Tracklet_w_depth(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Step 1: Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with high score detection boxes, depth cascade mathcing'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
# Camera motion compensation
|
||||
warp = self.gmc.apply(ori_img, dets)
|
||||
self.gmc.multi_gmc(tracklet_pool, warp)
|
||||
self.gmc.multi_gmc(unconfirmed, warp)
|
||||
|
||||
# depth cascade matching
|
||||
activated_tracklets, refind_tracklets, u_track, u_detection_high = self.DCM(
|
||||
detections,
|
||||
tracklet_pool,
|
||||
activated_tracklets,
|
||||
refind_tracklets,
|
||||
levels=3,
|
||||
thresh=0.75,
|
||||
is_fuse=True)
|
||||
|
||||
''' Step 3: Second association, with low score detection boxes, depth cascade mathcing'''
|
||||
if len(dets_second) > 0:
|
||||
'''Detections'''
|
||||
detections_second = [Tracklet_w_depth(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
|
||||
else:
|
||||
detections_second = []
|
||||
|
||||
r_tracked_tracklets = [t for t in u_track if t.state == TrackState.Tracked]
|
||||
|
||||
activated_tracklets, refind_tracklets, u_track, u_detection_sec = self.DCM(
|
||||
detections_second,
|
||||
r_tracked_tracklets,
|
||||
activated_tracklets,
|
||||
refind_tracklets,
|
||||
levels=3,
|
||||
thresh=0.3,
|
||||
is_fuse=False)
|
||||
|
||||
for track in u_track:
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = u_detection_high
|
||||
dists = iou_distance(unconfirmed, detections)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
327
test/yolov7-tracker/tracker/trackers/strongsort_tracker.py
Normal file
327
test/yolov7-tracker/tracker/trackers/strongsort_tracker.py
Normal file
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
Deep Sort
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchvision.ops import nms
|
||||
|
||||
import cv2
|
||||
import torchvision.transforms as T
|
||||
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet, Tracklet_w_reid
|
||||
from .matching import *
|
||||
|
||||
from .reid_models.OSNet import *
|
||||
from .reid_models.load_model_tools import load_pretrained_weights
|
||||
from .reid_models.deepsort_reid import Extractor
|
||||
|
||||
REID_MODEL_DICT = {
|
||||
'osnet_x1_0': osnet_x1_0,
|
||||
'osnet_x0_75': osnet_x0_75,
|
||||
'osnet_x0_5': osnet_x0_5,
|
||||
'osnet_x0_25': osnet_x0_25,
|
||||
'deepsort': Extractor
|
||||
}
|
||||
|
||||
|
||||
def load_reid_model(reid_model, reid_model_path):
|
||||
|
||||
if 'osnet' in reid_model:
|
||||
func = REID_MODEL_DICT[reid_model]
|
||||
model = func(num_classes=1, pretrained=False, )
|
||||
load_pretrained_weights(model, reid_model_path)
|
||||
model.cuda().eval()
|
||||
|
||||
elif 'deepsort' in reid_model:
|
||||
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return model
|
||||
|
||||
|
||||
class StrongSortTracker(object):
|
||||
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
self.with_reid = not args.discard_reid
|
||||
|
||||
self.reid_model, self.crop_transforms = None, None
|
||||
if self.with_reid:
|
||||
self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
|
||||
self.crop_transforms = T.Compose([
|
||||
# T.ToPILImage(),
|
||||
# T.Resize(size=(256, 128)),
|
||||
T.ToTensor(), # (c, 128, 256)
|
||||
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
])
|
||||
|
||||
self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128)
|
||||
|
||||
self.lambda_ = 0.98 # the coef of cost mix in eq. 10 in paper
|
||||
|
||||
|
||||
def reid_preprocess(self, obj_bbox):
|
||||
"""
|
||||
preprocess cropped object bboxes
|
||||
|
||||
obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
|
||||
|
||||
return:
|
||||
torch.Tensor of shape (c, 128, 256)
|
||||
"""
|
||||
|
||||
obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size) # shape: (h, w, c)
|
||||
|
||||
return self.crop_transforms(obj_bbox)
|
||||
|
||||
def get_feature(self, tlwhs, ori_img):
|
||||
"""
|
||||
get apperance feature of an object
|
||||
tlwhs: shape (num_of_objects, 4)
|
||||
ori_img: original image, np.ndarray, shape(H, W, C)
|
||||
"""
|
||||
obj_bbox = []
|
||||
|
||||
for tlwh in tlwhs:
|
||||
tlwh = list(map(int, tlwh))
|
||||
|
||||
# limit to the legal range
|
||||
tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0)
|
||||
|
||||
tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
|
||||
|
||||
obj_bbox.append(tlbr_tensor)
|
||||
|
||||
if not obj_bbox:
|
||||
return np.array([])
|
||||
|
||||
obj_bbox = torch.stack(obj_bbox, dim=0)
|
||||
obj_bbox = obj_bbox.cuda()
|
||||
|
||||
features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim)
|
||||
return features.cpu().detach().numpy()
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlbr format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
|
||||
features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
|
||||
|
||||
if len(dets) > 0:
|
||||
'''Detections'''
|
||||
detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
|
||||
(tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with appearance'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
# vallina matching
|
||||
cost_matrix = self.gated_metric(tracklet_pool, detections)
|
||||
matches, u_track, u_detection = linear_assignment(cost_matrix, thresh=0.9)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
'''Step 3: Second association, with iou'''
|
||||
tracklet_for_iou = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
detection_for_iou = [detections[i] for i in u_detection]
|
||||
|
||||
dists = iou_distance(tracklet_for_iou, detection_for_iou)
|
||||
|
||||
matches, u_track, u_detection = linear_assignment(dists, thresh=0.5)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_for_iou[itracked]
|
||||
det = detection_for_iou[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detection_for_iou[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = tracklet_for_iou[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detection_for_iou[i] for i in u_detection]
|
||||
dists = iou_distance(unconfirmed, detections)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
def gated_metric(self, tracks, dets):
|
||||
"""
|
||||
get cost matrix, firstly calculate apperence cost, then filter by Kalman state.
|
||||
|
||||
tracks: List[STrack]
|
||||
dets: List[STrack]
|
||||
"""
|
||||
apperance_dist = embedding_distance(tracks=tracks, detections=dets, metric='cosine')
|
||||
cost_matrix = self.gate_cost_matrix(apperance_dist, tracks, dets, )
|
||||
return cost_matrix
|
||||
|
||||
def gate_cost_matrix(self, cost_matrix, tracks, dets, max_apperance_thresh=0.15, gated_cost=1e5, only_position=False):
|
||||
"""
|
||||
gate cost matrix by calculating the Kalman state distance and constrainted by
|
||||
0.95 confidence interval of x2 distribution
|
||||
|
||||
cost_matrix: np.ndarray, shape (len(tracks), len(dets))
|
||||
tracks: List[STrack]
|
||||
dets: List[STrack]
|
||||
gated_cost: a very largt const to infeasible associations
|
||||
only_position: use [xc, yc, a, h] as state vector or only use [xc, yc]
|
||||
|
||||
return:
|
||||
updated cost_matirx, np.ndarray
|
||||
"""
|
||||
gating_dim = 2 if only_position else 4
|
||||
gating_threshold = chi2inv95[gating_dim]
|
||||
measurements = np.asarray([Tracklet.tlwh_to_xyah(det.tlwh) for det in dets]) # (len(dets), 4)
|
||||
|
||||
cost_matrix[cost_matrix > max_apperance_thresh] = gated_cost
|
||||
for row, track in enumerate(tracks):
|
||||
gating_distance = track.kalman_filter.gating_distance(measurements, )
|
||||
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
|
||||
|
||||
cost_matrix[row] = self.lambda_ * cost_matrix[row] + (1 - self.lambda_) * gating_distance
|
||||
return cost_matrix
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
366
test/yolov7-tracker/tracker/trackers/tracklet.py
Normal file
366
test/yolov7-tracker/tracker/trackers/tracklet.py
Normal file
@@ -0,0 +1,366 @@
|
||||
"""
|
||||
implements base elements of trajectory
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .kalman_filters.bytetrack_kalman import ByteKalman
|
||||
from .kalman_filters.botsort_kalman import BotKalman
|
||||
from .kalman_filters.ocsort_kalman import OCSORTKalman
|
||||
from .kalman_filters.sort_kalman import SORTKalman
|
||||
from .kalman_filters.strongsort_kalman import NSAKalman
|
||||
|
||||
MOTION_MODEL_DICT = {
|
||||
'sort': SORTKalman,
|
||||
'byte': ByteKalman,
|
||||
'bot': BotKalman,
|
||||
'ocsort': OCSORTKalman,
|
||||
'strongsort': NSAKalman,
|
||||
}
|
||||
|
||||
STATE_CONVERT_DICT = {
|
||||
'sort': 'xysa',
|
||||
'byte': 'xyah',
|
||||
'bot': 'xywh',
|
||||
'ocsort': 'xysa',
|
||||
'strongsort': 'xyah'
|
||||
}
|
||||
|
||||
class Tracklet(BaseTrack):
|
||||
def __init__(self, tlwh, score, category, motion='byte'):
|
||||
|
||||
# initial position
|
||||
self._tlwh = np.asarray(tlwh, dtype=np.float)
|
||||
self.is_activated = False
|
||||
|
||||
self.score = score
|
||||
self.category = category
|
||||
|
||||
# kalman
|
||||
self.motion = motion
|
||||
self.kalman_filter = MOTION_MODEL_DICT[motion]()
|
||||
|
||||
self.convert_func = self.__getattribute__('tlwh_to_' + STATE_CONVERT_DICT[motion])
|
||||
|
||||
# init kalman
|
||||
self.kalman_filter.initialize(self.convert_func(self._tlwh))
|
||||
|
||||
def predict(self):
|
||||
self.kalman_filter.predict()
|
||||
self.time_since_update += 1
|
||||
|
||||
def activate(self, frame_id):
|
||||
self.track_id = self.next_id()
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
if frame_id == 1:
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
self.start_frame = frame_id
|
||||
|
||||
|
||||
def re_activate(self, new_track, frame_id, new_id=False):
|
||||
|
||||
# TODO different convert
|
||||
self.kalman_filter.update(self.convert_func(new_track.tlwh))
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
if new_id:
|
||||
self.track_id = self.next_id()
|
||||
self.score = new_track.score
|
||||
|
||||
def update(self, new_track, frame_id):
|
||||
self.frame_id = frame_id
|
||||
|
||||
new_tlwh = new_track.tlwh
|
||||
self.score = new_track.score
|
||||
|
||||
self.kalman_filter.update(self.convert_func(new_tlwh))
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
|
||||
self.time_since_update = 0
|
||||
|
||||
@property
|
||||
def tlwh(self):
|
||||
"""Get current position in bounding box format `(top left x, top left y,
|
||||
width, height)`.
|
||||
"""
|
||||
return self.__getattribute__(STATE_CONVERT_DICT[self.motion] + '_to_tlwh')()
|
||||
|
||||
def xyah_to_tlwh(self, ):
|
||||
x = self.kalman_filter.kf.x
|
||||
ret = x[:4].copy()
|
||||
ret[2] *= ret[3]
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
def xywh_to_tlwh(self, ):
|
||||
x = self.kalman_filter.kf.x
|
||||
ret = x[:4].copy()
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
def xysa_to_tlwh(self, ):
|
||||
x = self.kalman_filter.kf.x
|
||||
ret = x[:4].copy()
|
||||
ret[2] = np.sqrt(x[2] * x[3])
|
||||
ret[3] = x[2] / ret[2]
|
||||
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
|
||||
class Tracklet_w_reid(Tracklet):
|
||||
"""
|
||||
Tracklet class with reid features, for botsort, deepsort, etc.
|
||||
"""
|
||||
|
||||
def __init__(self, tlwh, score, category, motion='byte',
|
||||
feat=None, feat_history=50):
|
||||
super().__init__(tlwh, score, category, motion)
|
||||
|
||||
self.smooth_feat = None # EMA feature
|
||||
self.curr_feat = None # current feature
|
||||
self.features = deque([], maxlen=feat_history) # all features
|
||||
if feat is not None:
|
||||
self.update_features(feat)
|
||||
|
||||
self.alpha = 0.9
|
||||
|
||||
def update_features(self, feat):
|
||||
feat /= np.linalg.norm(feat)
|
||||
self.curr_feat = feat
|
||||
if self.smooth_feat is None:
|
||||
self.smooth_feat = feat
|
||||
else:
|
||||
self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat
|
||||
self.features.append(feat)
|
||||
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
|
||||
|
||||
def re_activate(self, new_track, frame_id, new_id=False):
|
||||
|
||||
# TODO different convert
|
||||
if isinstance(self.kalman_filter, NSAKalman):
|
||||
self.kalman_filter.update(self.convert_func(new_track.tlwh), new_track.score)
|
||||
else:
|
||||
self.kalman_filter.update(self.convert_func(new_track.tlwh))
|
||||
|
||||
if new_track.curr_feat is not None:
|
||||
self.update_features(new_track.curr_feat)
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
if new_id:
|
||||
self.track_id = self.next_id()
|
||||
self.score = new_track.score
|
||||
|
||||
def update(self, new_track, frame_id):
|
||||
self.frame_id = frame_id
|
||||
|
||||
new_tlwh = new_track.tlwh
|
||||
self.score = new_track.score
|
||||
|
||||
if isinstance(self.kalman_filter, NSAKalman):
|
||||
self.kalman_filter.update(self.convert_func(new_tlwh), self.score)
|
||||
else:
|
||||
self.kalman_filter.update(self.convert_func(new_tlwh))
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
|
||||
|
||||
if new_track.curr_feat is not None:
|
||||
self.update_features(new_track.curr_feat)
|
||||
|
||||
self.time_since_update = 0
|
||||
|
||||
|
||||
class Tracklet_w_velocity(Tracklet):
|
||||
"""
|
||||
Tracklet class with reid features, for ocsort.
|
||||
"""
|
||||
|
||||
def __init__(self, tlwh, score, category, motion='byte', delta_t=3):
|
||||
super().__init__(tlwh, score, category, motion)
|
||||
|
||||
self.last_observation = np.array([-1, -1, -1, -1, -1]) # placeholder
|
||||
self.observations = dict()
|
||||
self.history_observations = []
|
||||
self.velocity = None
|
||||
self.delta_t = delta_t
|
||||
|
||||
self.age = 0 # mark the age
|
||||
|
||||
@staticmethod
|
||||
def speed_direction(bbox1, bbox2):
|
||||
cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0
|
||||
cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0
|
||||
speed = np.array([cy2 - cy1, cx2 - cx1])
|
||||
norm = np.sqrt((cy2 - cy1)**2 + (cx2 - cx1)**2) + 1e-6
|
||||
return speed / norm
|
||||
|
||||
def predict(self):
|
||||
self.kalman_filter.predict()
|
||||
|
||||
self.age += 1
|
||||
self.time_since_update += 1
|
||||
|
||||
def update(self, new_track, frame_id):
|
||||
self.frame_id = frame_id
|
||||
|
||||
new_tlwh = new_track.tlwh
|
||||
self.score = new_track.score
|
||||
|
||||
self.kalman_filter.update(self.convert_func(new_tlwh))
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.time_since_update = 0
|
||||
|
||||
# update velocity and history buffer
|
||||
new_tlbr = Tracklet_w_bbox_buffer.tlwh_to_tlbr(new_tlwh)
|
||||
|
||||
if self.last_observation.sum() >= 0: # no previous observation
|
||||
previous_box = None
|
||||
for i in range(self.delta_t):
|
||||
dt = self.delta_t - i
|
||||
if self.age - dt in self.observations:
|
||||
previous_box = self.observations[self.age-dt]
|
||||
break
|
||||
if previous_box is None:
|
||||
previous_box = self.last_observation
|
||||
"""
|
||||
Estimate the track speed direction with observations \Delta t steps away
|
||||
"""
|
||||
self.velocity = self.speed_direction(previous_box, new_tlbr)
|
||||
|
||||
new_observation = np.r_[new_tlbr, new_track.score]
|
||||
self.last_observation = new_observation
|
||||
self.observations[self.age] = new_observation
|
||||
self.history_observations.append(new_observation)
|
||||
|
||||
|
||||
|
||||
|
||||
class Tracklet_w_bbox_buffer(Tracklet):
|
||||
"""
|
||||
Tracklet class with buffer of bbox, for C_BIoU track.
|
||||
"""
|
||||
def __init__(self, tlwh, score, category, motion='byte'):
|
||||
super().__init__(tlwh, score, category, motion)
|
||||
|
||||
# params in motion state
|
||||
self.b1, self.b2, self.n = 0.3, 0.5, 5
|
||||
self.origin_bbox_buffer = deque() # a deque store the original bbox(tlwh) from t - self.n to t, where t is the last time detected
|
||||
self.origin_bbox_buffer.append(self._tlwh)
|
||||
# buffered bbox, two buffer sizes
|
||||
self.buffer_bbox1 = self.get_buffer_bbox(level=1)
|
||||
self.buffer_bbox2 = self.get_buffer_bbox(level=2)
|
||||
# motion state, s^{t + \delta} = o^t + (\delta / n) * \sum_{i=t-n+1}^t(o^i - o^{i-1}) = o^t + (\delta / n) * (o^t - o^{t - n})
|
||||
self.motion_state1 = self.buffer_bbox1.copy()
|
||||
self.motion_state2 = self.buffer_bbox2.copy()
|
||||
|
||||
def get_buffer_bbox(self, level=1, bbox=None):
|
||||
"""
|
||||
get buffered bbox as: (top, left, w, h) -> (top - bw, y - bh, w + 2bw, h + 2bh)
|
||||
level = 1: b = self.b1 level = 2: b = self.b2
|
||||
bbox: if not None, use bbox to calculate buffer_bbox, else use self._tlwh
|
||||
"""
|
||||
assert level in [1, 2], 'level must be 1 or 2'
|
||||
|
||||
b = self.b1 if level == 1 else self.b2
|
||||
|
||||
if bbox is None:
|
||||
buffer_bbox = self._tlwh + np.array([-b*self._tlwh[2], -b*self._tlwh[3], 2*b*self._tlwh[2], 2*b*self._tlwh[3]])
|
||||
else:
|
||||
buffer_bbox = bbox + np.array([-b*bbox[2], -b*bbox[3], 2*b*bbox[2], 2*b*bbox[3]])
|
||||
return np.maximum(0.0, buffer_bbox)
|
||||
|
||||
def re_activate(self, new_track, frame_id, new_id=False):
|
||||
|
||||
# TODO different convert
|
||||
self.kalman_filter.update(self.convert_func(new_track.tlwh))
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
if new_id:
|
||||
self.track_id = self.next_id()
|
||||
self.score = new_track.score
|
||||
|
||||
self._tlwh = new_track._tlwh
|
||||
# update stored bbox
|
||||
if (len(self.origin_bbox_buffer) > self.n):
|
||||
self.origin_bbox_buffer.popleft()
|
||||
self.origin_bbox_buffer.append(self._tlwh)
|
||||
else:
|
||||
self.origin_bbox_buffer.append(self._tlwh)
|
||||
|
||||
self.buffer_bbox1 = self.get_buffer_bbox(level=1)
|
||||
self.buffer_bbox2 = self.get_buffer_bbox(level=2)
|
||||
self.motion_state1 = self.buffer_bbox1.copy()
|
||||
self.motion_state2 = self.buffer_bbox2.copy()
|
||||
|
||||
def update(self, new_track, frame_id):
|
||||
self.frame_id = frame_id
|
||||
|
||||
new_tlwh = new_track.tlwh
|
||||
self.score = new_track.score
|
||||
|
||||
self.kalman_filter.update(self.convert_func(new_tlwh))
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
|
||||
self.time_since_update = 0
|
||||
|
||||
# update stored bbox
|
||||
if (len(self.origin_bbox_buffer) > self.n):
|
||||
self.origin_bbox_buffer.popleft()
|
||||
self.origin_bbox_buffer.append(new_tlwh)
|
||||
else:
|
||||
self.origin_bbox_buffer.append(new_tlwh)
|
||||
|
||||
# update motion state
|
||||
if self.time_since_update: # have some unmatched frames
|
||||
if len(self.origin_bbox_buffer) < self.n:
|
||||
self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh)
|
||||
self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh)
|
||||
else: # s^{t + \delta} = o^t + (\delta / n) * (o^t - o^{t - n})
|
||||
motion_state = self.origin_bbox_buffer[-1] + \
|
||||
(self.time_since_update / self.n) * (self.origin_bbox_buffer[-1] - self.origin_bbox_buffer[0])
|
||||
self.motion_state1 = self.get_buffer_bbox(level=1, bbox=motion_state)
|
||||
self.motion_state2 = self.get_buffer_bbox(level=2, bbox=motion_state)
|
||||
|
||||
else: # no unmatched frames, use current detection as motion state
|
||||
self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh)
|
||||
self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh)
|
||||
|
||||
|
||||
class Tracklet_w_depth(Tracklet):
|
||||
"""
|
||||
tracklet with depth info (i.e., 2000 - y2), for SparseTrack
|
||||
"""
|
||||
|
||||
def __init__(self, tlwh, score, category, motion='byte'):
|
||||
super().__init__(tlwh, score, category, motion)
|
||||
|
||||
|
||||
@property
|
||||
# @jit(nopython=True)
|
||||
def deep_vec(self):
|
||||
"""Convert bounding box to format `((top left, bottom right)`, i.e.,
|
||||
`(top left, bottom right)`.
|
||||
"""
|
||||
ret = self.tlwh.copy()
|
||||
cx = ret[0] + 0.5 * ret[2]
|
||||
y2 = ret[1] + ret[3]
|
||||
lendth = 2000 - y2
|
||||
return np.asarray([cx, y2, lendth], dtype=np.float)
|
||||
5
test/yolov7-tracker/tracker/trackeval/__init__.py
Normal file
5
test/yolov7-tracker/tracker/trackeval/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from .eval import Evaluator
|
||||
from . import datasets
|
||||
from . import metrics
|
||||
from . import plotting
|
||||
from . import utils
|
||||
65
test/yolov7-tracker/tracker/trackeval/_timing.py
Normal file
65
test/yolov7-tracker/tracker/trackeval/_timing.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from functools import wraps
|
||||
from time import perf_counter
|
||||
import inspect
|
||||
|
||||
DO_TIMING = False
|
||||
DISPLAY_LESS_PROGRESS = False
|
||||
timer_dict = {}
|
||||
counter = 0
|
||||
|
||||
|
||||
def time(f):
|
||||
@wraps(f)
|
||||
def wrap(*args, **kw):
|
||||
if DO_TIMING:
|
||||
# Run function with timing
|
||||
ts = perf_counter()
|
||||
result = f(*args, **kw)
|
||||
te = perf_counter()
|
||||
tt = te-ts
|
||||
|
||||
# Get function name
|
||||
arg_names = inspect.getfullargspec(f)[0]
|
||||
if arg_names[0] == 'self' and DISPLAY_LESS_PROGRESS:
|
||||
return result
|
||||
elif arg_names[0] == 'self':
|
||||
method_name = type(args[0]).__name__ + '.' + f.__name__
|
||||
else:
|
||||
method_name = f.__name__
|
||||
|
||||
# Record accumulative time in each function for analysis
|
||||
if method_name in timer_dict.keys():
|
||||
timer_dict[method_name] += tt
|
||||
else:
|
||||
timer_dict[method_name] = tt
|
||||
|
||||
# If code is finished, display timing summary
|
||||
if method_name == "Evaluator.evaluate":
|
||||
print("")
|
||||
print("Timing analysis:")
|
||||
for key, value in timer_dict.items():
|
||||
print('%-70s %2.4f sec' % (key, value))
|
||||
else:
|
||||
# Get function argument values for printing special arguments of interest
|
||||
arg_titles = ['tracker', 'seq', 'cls']
|
||||
arg_vals = []
|
||||
for i, a in enumerate(arg_names):
|
||||
if a in arg_titles:
|
||||
arg_vals.append(args[i])
|
||||
arg_text = '(' + ', '.join(arg_vals) + ')'
|
||||
|
||||
# Display methods and functions with different indentation.
|
||||
if arg_names[0] == 'self':
|
||||
print('%-74s %2.4f sec' % (' '*4 + method_name + arg_text, tt))
|
||||
elif arg_names[0] == 'test':
|
||||
pass
|
||||
else:
|
||||
global counter
|
||||
counter += 1
|
||||
print('%i %-70s %2.4f sec' % (counter, method_name + arg_text, tt))
|
||||
|
||||
return result
|
||||
else:
|
||||
# If config["TIME_PROGRESS"] is false, or config["USE_PARALLEL"] is true, run functions normally without timing.
|
||||
return f(*args, **kw)
|
||||
return wrap
|
||||
@@ -0,0 +1,6 @@
|
||||
import baseline_utils
|
||||
import stp
|
||||
import non_overlap
|
||||
import pascal_colormap
|
||||
import thresholder
|
||||
import vizualize
|
||||
@@ -0,0 +1,321 @@
|
||||
|
||||
import os
|
||||
import csv
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
from PIL import Image
|
||||
from pycocotools import mask as mask_utils
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from trackeval.baselines.pascal_colormap import pascal_colormap
|
||||
|
||||
|
||||
def load_seq(file_to_load):
|
||||
""" Load input data from file in RobMOTS format (e.g. provided detections).
|
||||
Returns: Data object with the following structure (see STP :
|
||||
data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
|
||||
"""
|
||||
fp = open(file_to_load)
|
||||
dialect = csv.Sniffer().sniff(fp.readline(), delimiters=' ')
|
||||
dialect.skipinitialspace = True
|
||||
fp.seek(0)
|
||||
reader = csv.reader(fp, dialect)
|
||||
read_data = {}
|
||||
num_timesteps = 0
|
||||
for i, row in enumerate(reader):
|
||||
if row[-1] in '':
|
||||
row = row[:-1]
|
||||
t = int(row[0])
|
||||
cid = row[1]
|
||||
c = int(row[2])
|
||||
s = row[3]
|
||||
h = row[4]
|
||||
w = row[5]
|
||||
rle = row[6]
|
||||
|
||||
if t >= num_timesteps:
|
||||
num_timesteps = t + 1
|
||||
|
||||
if c in read_data.keys():
|
||||
if t in read_data[c].keys():
|
||||
read_data[c][t]['ids'].append(cid)
|
||||
read_data[c][t]['scores'].append(s)
|
||||
read_data[c][t]['im_hs'].append(h)
|
||||
read_data[c][t]['im_ws'].append(w)
|
||||
read_data[c][t]['mask_rles'].append(rle)
|
||||
else:
|
||||
read_data[c][t] = {}
|
||||
read_data[c][t]['ids'] = [cid]
|
||||
read_data[c][t]['scores'] = [s]
|
||||
read_data[c][t]['im_hs'] = [h]
|
||||
read_data[c][t]['im_ws'] = [w]
|
||||
read_data[c][t]['mask_rles'] = [rle]
|
||||
else:
|
||||
read_data[c] = {t: {}}
|
||||
read_data[c][t]['ids'] = [cid]
|
||||
read_data[c][t]['scores'] = [s]
|
||||
read_data[c][t]['im_hs'] = [h]
|
||||
read_data[c][t]['im_ws'] = [w]
|
||||
read_data[c][t]['mask_rles'] = [rle]
|
||||
fp.close()
|
||||
|
||||
data = {}
|
||||
for c in read_data.keys():
|
||||
data[c] = [{} for _ in range(num_timesteps)]
|
||||
for t in range(num_timesteps):
|
||||
if t in read_data[c].keys():
|
||||
data[c][t]['ids'] = np.atleast_1d(read_data[c][t]['ids']).astype(int)
|
||||
data[c][t]['scores'] = np.atleast_1d(read_data[c][t]['scores']).astype(float)
|
||||
data[c][t]['im_hs'] = np.atleast_1d(read_data[c][t]['im_hs']).astype(int)
|
||||
data[c][t]['im_ws'] = np.atleast_1d(read_data[c][t]['im_ws']).astype(int)
|
||||
data[c][t]['mask_rles'] = np.atleast_1d(read_data[c][t]['mask_rles']).astype(str)
|
||||
else:
|
||||
data[c][t]['ids'] = np.empty(0).astype(int)
|
||||
data[c][t]['scores'] = np.empty(0).astype(float)
|
||||
data[c][t]['im_hs'] = np.empty(0).astype(int)
|
||||
data[c][t]['im_ws'] = np.empty(0).astype(int)
|
||||
data[c][t]['mask_rles'] = np.empty(0).astype(str)
|
||||
return data
|
||||
|
||||
|
||||
def threshold(tdata, thresh):
|
||||
""" Removes detections below a certian threshold ('thresh') score. """
|
||||
new_data = {}
|
||||
to_keep = tdata['scores'] > thresh
|
||||
for field in ['ids', 'scores', 'im_hs', 'im_ws', 'mask_rles']:
|
||||
new_data[field] = tdata[field][to_keep]
|
||||
return new_data
|
||||
|
||||
|
||||
def create_coco_mask(mask_rles, im_hs, im_ws):
|
||||
""" Converts mask as rle text (+ height and width) to encoded version used by pycocotools. """
|
||||
coco_masks = [{'size': [h, w], 'counts': m.encode(encoding='UTF-8')}
|
||||
for h, w, m in zip(im_hs, im_ws, mask_rles)]
|
||||
return coco_masks
|
||||
|
||||
|
||||
def mask_iou(mask_rles1, mask_rles2, im_hs, im_ws, do_ioa=0):
|
||||
""" Calculate mask IoU between two masks.
|
||||
Further allows 'intersection over area' instead of IoU (over the area of mask_rle1).
|
||||
Allows either to pass in 1 boolean for do_ioa for all mask_rles2 or also one for each mask_rles2.
|
||||
It is recommended that mask_rles1 is a detection and mask_rles2 is a groundtruth.
|
||||
"""
|
||||
coco_masks1 = create_coco_mask(mask_rles1, im_hs, im_ws)
|
||||
coco_masks2 = create_coco_mask(mask_rles2, im_hs, im_ws)
|
||||
|
||||
if not hasattr(do_ioa, "__len__"):
|
||||
do_ioa = [do_ioa]*len(coco_masks2)
|
||||
assert(len(coco_masks2) == len(do_ioa))
|
||||
if len(coco_masks1) == 0 or len(coco_masks2) == 0:
|
||||
iou = np.zeros(len(coco_masks1), len(coco_masks2))
|
||||
else:
|
||||
iou = mask_utils.iou(coco_masks1, coco_masks2, do_ioa)
|
||||
return iou
|
||||
|
||||
|
||||
def sort_by_score(t_data):
|
||||
""" Sorts data by score """
|
||||
sort_index = np.argsort(t_data['scores'])[::-1]
|
||||
for k in t_data.keys():
|
||||
t_data[k] = t_data[k][sort_index]
|
||||
return t_data
|
||||
|
||||
|
||||
def mask_NMS(t_data, nms_threshold=0.5, already_sorted=False):
|
||||
""" Remove redundant masks by performing non-maximum suppression (NMS) """
|
||||
|
||||
# Sort by score
|
||||
if not already_sorted:
|
||||
t_data = sort_by_score(t_data)
|
||||
|
||||
# Calculate the mask IoU between all detections in the timestep.
|
||||
mask_ious_all = mask_iou(t_data['mask_rles'], t_data['mask_rles'], t_data['im_hs'], t_data['im_ws'])
|
||||
|
||||
# Determine which masks NMS should remove
|
||||
# (those overlapping greater than nms_threshold with another mask that has a higher score)
|
||||
num_dets = len(t_data['mask_rles'])
|
||||
to_remove = [False for _ in range(num_dets)]
|
||||
for i in range(num_dets):
|
||||
if not to_remove[i]:
|
||||
for j in range(i + 1, num_dets):
|
||||
if mask_ious_all[i, j] > nms_threshold:
|
||||
to_remove[j] = True
|
||||
|
||||
# Remove detections which should be removed
|
||||
to_keep = np.logical_not(to_remove)
|
||||
for k in t_data.keys():
|
||||
t_data[k] = t_data[k][to_keep]
|
||||
|
||||
return t_data
|
||||
|
||||
|
||||
def non_overlap(t_data, already_sorted=False):
|
||||
""" Enforces masks to be non-overlapping in an image, does this by putting masks 'on top of one another',
|
||||
such that higher score masks 'occlude' and thus remove parts of lower scoring masks.
|
||||
|
||||
Help wanted: if anyone knows a way to do this WITHOUT converting the RLE to the np.array let me know, because that
|
||||
would be MUCH more efficient. (I have tried, but haven't yet had success).
|
||||
"""
|
||||
|
||||
# Sort by score
|
||||
if not already_sorted:
|
||||
t_data = sort_by_score(t_data)
|
||||
|
||||
# Get coco masks
|
||||
coco_masks = create_coco_mask(t_data['mask_rles'], t_data['im_hs'], t_data['im_ws'])
|
||||
|
||||
# Create a single np.array to hold all of the non-overlapping mask
|
||||
masks_array = np.zeros((t_data['im_hs'][0], t_data['im_ws'][0]), 'uint8')
|
||||
|
||||
# Decode each mask into a np.array, and place it into the overall array for the whole frame.
|
||||
# Since masks with the lowest score are placed first, they are 'partially overridden' by masks with a higher score
|
||||
# if they overlap.
|
||||
for i, mask in enumerate(coco_masks[::-1]):
|
||||
masks_array[mask_utils.decode(mask).astype('bool')] = i + 1
|
||||
|
||||
# Encode the resulting np.array back into a set of coco_masks which are now non-overlapping.
|
||||
num_dets = len(coco_masks)
|
||||
for i, j in enumerate(range(1, num_dets + 1)[::-1]):
|
||||
coco_masks[i] = mask_utils.encode(np.asfortranarray(masks_array == j, dtype=np.uint8))
|
||||
|
||||
# Convert from coco_mask back into our mask_rle format.
|
||||
t_data['mask_rles'] = [m['counts'].decode("utf-8") for m in coco_masks]
|
||||
|
||||
return t_data
|
||||
|
||||
|
||||
def masks2boxes(mask_rles, im_hs, im_ws):
|
||||
""" Extracts bounding boxes which surround a set of masks. """
|
||||
coco_masks = create_coco_mask(mask_rles, im_hs, im_ws)
|
||||
boxes = np.array([mask_utils.toBbox(x) for x in coco_masks])
|
||||
if len(boxes) == 0:
|
||||
boxes = np.empty((0, 4))
|
||||
return boxes
|
||||
|
||||
|
||||
def box_iou(bboxes1, bboxes2, box_format='xywh', do_ioa=False, do_giou=False):
|
||||
""" Calculates the IOU (intersection over union) between two arrays of boxes.
|
||||
Allows variable box formats ('xywh' and 'x0y0x1y1').
|
||||
If do_ioa (intersection over area), then calculates the intersection over the area of boxes1 - this is commonly
|
||||
used to determine if detections are within crowd ignore region.
|
||||
If do_giou (generalized intersection over union, then calculates giou.
|
||||
"""
|
||||
if len(bboxes1) == 0 or len(bboxes2) == 0:
|
||||
ious = np.zeros((len(bboxes1), len(bboxes2)))
|
||||
return ious
|
||||
if box_format in 'xywh':
|
||||
# layout: (x0, y0, w, h)
|
||||
bboxes1 = deepcopy(bboxes1)
|
||||
bboxes2 = deepcopy(bboxes2)
|
||||
|
||||
bboxes1[:, 2] = bboxes1[:, 0] + bboxes1[:, 2]
|
||||
bboxes1[:, 3] = bboxes1[:, 1] + bboxes1[:, 3]
|
||||
bboxes2[:, 2] = bboxes2[:, 0] + bboxes2[:, 2]
|
||||
bboxes2[:, 3] = bboxes2[:, 1] + bboxes2[:, 3]
|
||||
elif box_format not in 'x0y0x1y1':
|
||||
raise (Exception('box_format %s is not implemented' % box_format))
|
||||
|
||||
# layout: (x0, y0, x1, y1)
|
||||
min_ = np.minimum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
|
||||
max_ = np.maximum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
|
||||
intersection = np.maximum(min_[..., 2] - max_[..., 0], 0) * np.maximum(min_[..., 3] - max_[..., 1], 0)
|
||||
area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
|
||||
|
||||
if do_ioa:
|
||||
ioas = np.zeros_like(intersection)
|
||||
valid_mask = area1 > 0 + np.finfo('float').eps
|
||||
ioas[valid_mask, :] = intersection[valid_mask, :] / area1[valid_mask][:, np.newaxis]
|
||||
|
||||
return ioas
|
||||
else:
|
||||
area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
|
||||
union = area1[:, np.newaxis] + area2[np.newaxis, :] - intersection
|
||||
intersection[area1 <= 0 + np.finfo('float').eps, :] = 0
|
||||
intersection[:, area2 <= 0 + np.finfo('float').eps] = 0
|
||||
intersection[union <= 0 + np.finfo('float').eps] = 0
|
||||
union[union <= 0 + np.finfo('float').eps] = 1
|
||||
ious = intersection / union
|
||||
|
||||
if do_giou:
|
||||
enclosing_area = np.maximum(max_[..., 2] - min_[..., 0], 0) * np.maximum(max_[..., 3] - min_[..., 1], 0)
|
||||
eps = 1e-7
|
||||
# giou
|
||||
ious = ious - ((enclosing_area - union) / (enclosing_area + eps))
|
||||
|
||||
return ious
|
||||
|
||||
|
||||
def match(match_scores):
|
||||
match_rows, match_cols = linear_sum_assignment(-match_scores)
|
||||
return match_rows, match_cols
|
||||
|
||||
|
||||
def write_seq(output_data, out_file):
|
||||
out_loc = os.path.dirname(out_file)
|
||||
if not os.path.exists(out_loc):
|
||||
os.makedirs(out_loc, exist_ok=True)
|
||||
fp = open(out_file, 'w', newline='')
|
||||
writer = csv.writer(fp, delimiter=' ')
|
||||
for row in output_data:
|
||||
writer.writerow(row)
|
||||
fp.close()
|
||||
|
||||
|
||||
def combine_classes(data):
|
||||
""" Converts data from a class-separated to a class-combined format.
|
||||
Input format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
|
||||
Output format: data[t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles', 'cls'}
|
||||
"""
|
||||
output_data = [{} for _ in list(data.values())[0]]
|
||||
for cls, cls_data in data.items():
|
||||
for timestep, t_data in enumerate(cls_data):
|
||||
for k in t_data.keys():
|
||||
if k in output_data[timestep].keys():
|
||||
output_data[timestep][k] += list(t_data[k])
|
||||
else:
|
||||
output_data[timestep][k] = list(t_data[k])
|
||||
if 'cls' in output_data[timestep].keys():
|
||||
output_data[timestep]['cls'] += [cls]*len(output_data[timestep]['ids'])
|
||||
else:
|
||||
output_data[timestep]['cls'] = [cls]*len(output_data[timestep]['ids'])
|
||||
|
||||
for timestep, t_data in enumerate(output_data):
|
||||
for k in t_data.keys():
|
||||
output_data[timestep][k] = np.array(output_data[timestep][k])
|
||||
|
||||
return output_data
|
||||
|
||||
|
||||
def save_as_png(t_data, out_file, im_h, im_w):
|
||||
""" Save a set of segmentation masks into a PNG format, the same as used for the DAVIS dataset."""
|
||||
|
||||
if len(t_data['mask_rles']) > 0:
|
||||
coco_masks = create_coco_mask(t_data['mask_rles'], t_data['im_hs'], t_data['im_ws'])
|
||||
|
||||
list_of_np_masks = [mask_utils.decode(mask) for mask in coco_masks]
|
||||
|
||||
png = np.zeros((t_data['im_hs'][0], t_data['im_ws'][0]))
|
||||
for mask, c_id in zip(list_of_np_masks, t_data['ids']):
|
||||
png[mask.astype("bool")] = c_id + 1
|
||||
else:
|
||||
png = np.zeros((im_h, im_w))
|
||||
|
||||
if not os.path.exists(os.path.dirname(out_file)):
|
||||
os.makedirs(os.path.dirname(out_file))
|
||||
|
||||
colmap = (np.array(pascal_colormap) * 255).round().astype("uint8")
|
||||
palimage = Image.new('P', (16, 16))
|
||||
palimage.putpalette(colmap)
|
||||
im = Image.fromarray(np.squeeze(png.astype("uint8")))
|
||||
im2 = im.quantize(palette=palimage)
|
||||
im2.save(out_file)
|
||||
|
||||
|
||||
def get_frame_size(data):
|
||||
""" Gets frame height and width from data. """
|
||||
for cls, cls_data in data.items():
|
||||
for timestep, t_data in enumerate(cls_data):
|
||||
if len(t_data['im_hs'] > 0):
|
||||
im_h = t_data['im_hs'][0]
|
||||
im_w = t_data['im_ws'][0]
|
||||
return im_h, im_w
|
||||
return None
|
||||
@@ -0,0 +1,92 @@
|
||||
"""
|
||||
Non-Overlap: Code to take in a set of raw detections and produce a set of non-overlapping detections from it.
|
||||
|
||||
Author: Jonathon Luiten
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from multiprocessing.pool import Pool
|
||||
from multiprocessing import freeze_support
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
|
||||
from trackeval.baselines import baseline_utils as butils
|
||||
from trackeval.utils import get_code_path
|
||||
|
||||
code_path = get_code_path()
|
||||
config = {
|
||||
'INPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/raw_supplied/data/'),
|
||||
'OUTPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/non_overlap_supplied/data/'),
|
||||
'SPLIT': 'train', # valid: 'train', 'val', 'test'.
|
||||
'Benchmarks': None, # If None, all benchmarks in SPLIT.
|
||||
|
||||
'Num_Parallel_Cores': None, # If None, run without parallel.
|
||||
|
||||
'THRESHOLD_NMS_MASK_IOU': 0.5,
|
||||
}
|
||||
|
||||
|
||||
def do_sequence(seq_file):
|
||||
|
||||
# Load input data from file (e.g. provided detections)
|
||||
# data format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
|
||||
data = butils.load_seq(seq_file)
|
||||
|
||||
# Converts data from a class-separated to a class-combined format.
|
||||
# data[t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles', 'cls'}
|
||||
data = butils.combine_classes(data)
|
||||
|
||||
# Where to accumulate output data for writing out
|
||||
output_data = []
|
||||
|
||||
# Run for each timestep.
|
||||
for timestep, t_data in enumerate(data):
|
||||
|
||||
# Remove redundant masks by performing non-maximum suppression (NMS)
|
||||
t_data = butils.mask_NMS(t_data, nms_threshold=config['THRESHOLD_NMS_MASK_IOU'])
|
||||
|
||||
# Perform non-overlap, to get non_overlapping masks.
|
||||
t_data = butils.non_overlap(t_data, already_sorted=True)
|
||||
|
||||
# Save result in output format to write to file later.
|
||||
# Output Format = [timestep ID class score im_h im_w mask_RLE]
|
||||
for i in range(len(t_data['ids'])):
|
||||
row = [timestep, int(t_data['ids'][i]), t_data['cls'][i], t_data['scores'][i], t_data['im_hs'][i],
|
||||
t_data['im_ws'][i], t_data['mask_rles'][i]]
|
||||
output_data.append(row)
|
||||
|
||||
# Write results to file
|
||||
out_file = seq_file.replace(config['INPUT_FOL'].format(split=config['SPLIT']),
|
||||
config['OUTPUT_FOL'].format(split=config['SPLIT']))
|
||||
butils.write_seq(output_data, out_file)
|
||||
|
||||
print('DONE:', seq_file)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# Required to fix bug in multiprocessing on windows.
|
||||
freeze_support()
|
||||
|
||||
# Obtain list of sequences to run tracker for.
|
||||
if config['Benchmarks']:
|
||||
benchmarks = config['Benchmarks']
|
||||
else:
|
||||
benchmarks = ['davis_unsupervised', 'kitti_mots', 'youtube_vis', 'ovis', 'bdd_mots', 'tao']
|
||||
if config['SPLIT'] != 'train':
|
||||
benchmarks += ['waymo', 'mots_challenge']
|
||||
seqs_todo = []
|
||||
for bench in benchmarks:
|
||||
bench_fol = os.path.join(config['INPUT_FOL'].format(split=config['SPLIT']), bench)
|
||||
seqs_todo += [os.path.join(bench_fol, seq) for seq in os.listdir(bench_fol)]
|
||||
|
||||
# Run in parallel
|
||||
if config['Num_Parallel_Cores']:
|
||||
with Pool(config['Num_Parallel_Cores']) as pool:
|
||||
results = pool.map(do_sequence, seqs_todo)
|
||||
|
||||
# Run in series
|
||||
else:
|
||||
for seq_todo in seqs_todo:
|
||||
do_sequence(seq_todo)
|
||||
|
||||
@@ -0,0 +1,257 @@
|
||||
pascal_colormap = [
|
||||
0 , 0, 0,
|
||||
0.5020, 0, 0,
|
||||
0, 0.5020, 0,
|
||||
0.5020, 0.5020, 0,
|
||||
0, 0, 0.5020,
|
||||
0.5020, 0, 0.5020,
|
||||
0, 0.5020, 0.5020,
|
||||
0.5020, 0.5020, 0.5020,
|
||||
0.2510, 0, 0,
|
||||
0.7529, 0, 0,
|
||||
0.2510, 0.5020, 0,
|
||||
0.7529, 0.5020, 0,
|
||||
0.2510, 0, 0.5020,
|
||||
0.7529, 0, 0.5020,
|
||||
0.2510, 0.5020, 0.5020,
|
||||
0.7529, 0.5020, 0.5020,
|
||||
0, 0.2510, 0,
|
||||
0.5020, 0.2510, 0,
|
||||
0, 0.7529, 0,
|
||||
0.5020, 0.7529, 0,
|
||||
0, 0.2510, 0.5020,
|
||||
0.5020, 0.2510, 0.5020,
|
||||
0, 0.7529, 0.5020,
|
||||
0.5020, 0.7529, 0.5020,
|
||||
0.2510, 0.2510, 0,
|
||||
0.7529, 0.2510, 0,
|
||||
0.2510, 0.7529, 0,
|
||||
0.7529, 0.7529, 0,
|
||||
0.2510, 0.2510, 0.5020,
|
||||
0.7529, 0.2510, 0.5020,
|
||||
0.2510, 0.7529, 0.5020,
|
||||
0.7529, 0.7529, 0.5020,
|
||||
0, 0, 0.2510,
|
||||
0.5020, 0, 0.2510,
|
||||
0, 0.5020, 0.2510,
|
||||
0.5020, 0.5020, 0.2510,
|
||||
0, 0, 0.7529,
|
||||
0.5020, 0, 0.7529,
|
||||
0, 0.5020, 0.7529,
|
||||
0.5020, 0.5020, 0.7529,
|
||||
0.2510, 0, 0.2510,
|
||||
0.7529, 0, 0.2510,
|
||||
0.2510, 0.5020, 0.2510,
|
||||
0.7529, 0.5020, 0.2510,
|
||||
0.2510, 0, 0.7529,
|
||||
0.7529, 0, 0.7529,
|
||||
0.2510, 0.5020, 0.7529,
|
||||
0.7529, 0.5020, 0.7529,
|
||||
0, 0.2510, 0.2510,
|
||||
0.5020, 0.2510, 0.2510,
|
||||
0, 0.7529, 0.2510,
|
||||
0.5020, 0.7529, 0.2510,
|
||||
0, 0.2510, 0.7529,
|
||||
0.5020, 0.2510, 0.7529,
|
||||
0, 0.7529, 0.7529,
|
||||
0.5020, 0.7529, 0.7529,
|
||||
0.2510, 0.2510, 0.2510,
|
||||
0.7529, 0.2510, 0.2510,
|
||||
0.2510, 0.7529, 0.2510,
|
||||
0.7529, 0.7529, 0.2510,
|
||||
0.2510, 0.2510, 0.7529,
|
||||
0.7529, 0.2510, 0.7529,
|
||||
0.2510, 0.7529, 0.7529,
|
||||
0.7529, 0.7529, 0.7529,
|
||||
0.1255, 0, 0,
|
||||
0.6275, 0, 0,
|
||||
0.1255, 0.5020, 0,
|
||||
0.6275, 0.5020, 0,
|
||||
0.1255, 0, 0.5020,
|
||||
0.6275, 0, 0.5020,
|
||||
0.1255, 0.5020, 0.5020,
|
||||
0.6275, 0.5020, 0.5020,
|
||||
0.3765, 0, 0,
|
||||
0.8784, 0, 0,
|
||||
0.3765, 0.5020, 0,
|
||||
0.8784, 0.5020, 0,
|
||||
0.3765, 0, 0.5020,
|
||||
0.8784, 0, 0.5020,
|
||||
0.3765, 0.5020, 0.5020,
|
||||
0.8784, 0.5020, 0.5020,
|
||||
0.1255, 0.2510, 0,
|
||||
0.6275, 0.2510, 0,
|
||||
0.1255, 0.7529, 0,
|
||||
0.6275, 0.7529, 0,
|
||||
0.1255, 0.2510, 0.5020,
|
||||
0.6275, 0.2510, 0.5020,
|
||||
0.1255, 0.7529, 0.5020,
|
||||
0.6275, 0.7529, 0.5020,
|
||||
0.3765, 0.2510, 0,
|
||||
0.8784, 0.2510, 0,
|
||||
0.3765, 0.7529, 0,
|
||||
0.8784, 0.7529, 0,
|
||||
0.3765, 0.2510, 0.5020,
|
||||
0.8784, 0.2510, 0.5020,
|
||||
0.3765, 0.7529, 0.5020,
|
||||
0.8784, 0.7529, 0.5020,
|
||||
0.1255, 0, 0.2510,
|
||||
0.6275, 0, 0.2510,
|
||||
0.1255, 0.5020, 0.2510,
|
||||
0.6275, 0.5020, 0.2510,
|
||||
0.1255, 0, 0.7529,
|
||||
0.6275, 0, 0.7529,
|
||||
0.1255, 0.5020, 0.7529,
|
||||
0.6275, 0.5020, 0.7529,
|
||||
0.3765, 0, 0.2510,
|
||||
0.8784, 0, 0.2510,
|
||||
0.3765, 0.5020, 0.2510,
|
||||
0.8784, 0.5020, 0.2510,
|
||||
0.3765, 0, 0.7529,
|
||||
0.8784, 0, 0.7529,
|
||||
0.3765, 0.5020, 0.7529,
|
||||
0.8784, 0.5020, 0.7529,
|
||||
0.1255, 0.2510, 0.2510,
|
||||
0.6275, 0.2510, 0.2510,
|
||||
0.1255, 0.7529, 0.2510,
|
||||
0.6275, 0.7529, 0.2510,
|
||||
0.1255, 0.2510, 0.7529,
|
||||
0.6275, 0.2510, 0.7529,
|
||||
0.1255, 0.7529, 0.7529,
|
||||
0.6275, 0.7529, 0.7529,
|
||||
0.3765, 0.2510, 0.2510,
|
||||
0.8784, 0.2510, 0.2510,
|
||||
0.3765, 0.7529, 0.2510,
|
||||
0.8784, 0.7529, 0.2510,
|
||||
0.3765, 0.2510, 0.7529,
|
||||
0.8784, 0.2510, 0.7529,
|
||||
0.3765, 0.7529, 0.7529,
|
||||
0.8784, 0.7529, 0.7529,
|
||||
0, 0.1255, 0,
|
||||
0.5020, 0.1255, 0,
|
||||
0, 0.6275, 0,
|
||||
0.5020, 0.6275, 0,
|
||||
0, 0.1255, 0.5020,
|
||||
0.5020, 0.1255, 0.5020,
|
||||
0, 0.6275, 0.5020,
|
||||
0.5020, 0.6275, 0.5020,
|
||||
0.2510, 0.1255, 0,
|
||||
0.7529, 0.1255, 0,
|
||||
0.2510, 0.6275, 0,
|
||||
0.7529, 0.6275, 0,
|
||||
0.2510, 0.1255, 0.5020,
|
||||
0.7529, 0.1255, 0.5020,
|
||||
0.2510, 0.6275, 0.5020,
|
||||
0.7529, 0.6275, 0.5020,
|
||||
0, 0.3765, 0,
|
||||
0.5020, 0.3765, 0,
|
||||
0, 0.8784, 0,
|
||||
0.5020, 0.8784, 0,
|
||||
0, 0.3765, 0.5020,
|
||||
0.5020, 0.3765, 0.5020,
|
||||
0, 0.8784, 0.5020,
|
||||
0.5020, 0.8784, 0.5020,
|
||||
0.2510, 0.3765, 0,
|
||||
0.7529, 0.3765, 0,
|
||||
0.2510, 0.8784, 0,
|
||||
0.7529, 0.8784, 0,
|
||||
0.2510, 0.3765, 0.5020,
|
||||
0.7529, 0.3765, 0.5020,
|
||||
0.2510, 0.8784, 0.5020,
|
||||
0.7529, 0.8784, 0.5020,
|
||||
0, 0.1255, 0.2510,
|
||||
0.5020, 0.1255, 0.2510,
|
||||
0, 0.6275, 0.2510,
|
||||
0.5020, 0.6275, 0.2510,
|
||||
0, 0.1255, 0.7529,
|
||||
0.5020, 0.1255, 0.7529,
|
||||
0, 0.6275, 0.7529,
|
||||
0.5020, 0.6275, 0.7529,
|
||||
0.2510, 0.1255, 0.2510,
|
||||
0.7529, 0.1255, 0.2510,
|
||||
0.2510, 0.6275, 0.2510,
|
||||
0.7529, 0.6275, 0.2510,
|
||||
0.2510, 0.1255, 0.7529,
|
||||
0.7529, 0.1255, 0.7529,
|
||||
0.2510, 0.6275, 0.7529,
|
||||
0.7529, 0.6275, 0.7529,
|
||||
0, 0.3765, 0.2510,
|
||||
0.5020, 0.3765, 0.2510,
|
||||
0, 0.8784, 0.2510,
|
||||
0.5020, 0.8784, 0.2510,
|
||||
0, 0.3765, 0.7529,
|
||||
0.5020, 0.3765, 0.7529,
|
||||
0, 0.8784, 0.7529,
|
||||
0.5020, 0.8784, 0.7529,
|
||||
0.2510, 0.3765, 0.2510,
|
||||
0.7529, 0.3765, 0.2510,
|
||||
0.2510, 0.8784, 0.2510,
|
||||
0.7529, 0.8784, 0.2510,
|
||||
0.2510, 0.3765, 0.7529,
|
||||
0.7529, 0.3765, 0.7529,
|
||||
0.2510, 0.8784, 0.7529,
|
||||
0.7529, 0.8784, 0.7529,
|
||||
0.1255, 0.1255, 0,
|
||||
0.6275, 0.1255, 0,
|
||||
0.1255, 0.6275, 0,
|
||||
0.6275, 0.6275, 0,
|
||||
0.1255, 0.1255, 0.5020,
|
||||
0.6275, 0.1255, 0.5020,
|
||||
0.1255, 0.6275, 0.5020,
|
||||
0.6275, 0.6275, 0.5020,
|
||||
0.3765, 0.1255, 0,
|
||||
0.8784, 0.1255, 0,
|
||||
0.3765, 0.6275, 0,
|
||||
0.8784, 0.6275, 0,
|
||||
0.3765, 0.1255, 0.5020,
|
||||
0.8784, 0.1255, 0.5020,
|
||||
0.3765, 0.6275, 0.5020,
|
||||
0.8784, 0.6275, 0.5020,
|
||||
0.1255, 0.3765, 0,
|
||||
0.6275, 0.3765, 0,
|
||||
0.1255, 0.8784, 0,
|
||||
0.6275, 0.8784, 0,
|
||||
0.1255, 0.3765, 0.5020,
|
||||
0.6275, 0.3765, 0.5020,
|
||||
0.1255, 0.8784, 0.5020,
|
||||
0.6275, 0.8784, 0.5020,
|
||||
0.3765, 0.3765, 0,
|
||||
0.8784, 0.3765, 0,
|
||||
0.3765, 0.8784, 0,
|
||||
0.8784, 0.8784, 0,
|
||||
0.3765, 0.3765, 0.5020,
|
||||
0.8784, 0.3765, 0.5020,
|
||||
0.3765, 0.8784, 0.5020,
|
||||
0.8784, 0.8784, 0.5020,
|
||||
0.1255, 0.1255, 0.2510,
|
||||
0.6275, 0.1255, 0.2510,
|
||||
0.1255, 0.6275, 0.2510,
|
||||
0.6275, 0.6275, 0.2510,
|
||||
0.1255, 0.1255, 0.7529,
|
||||
0.6275, 0.1255, 0.7529,
|
||||
0.1255, 0.6275, 0.7529,
|
||||
0.6275, 0.6275, 0.7529,
|
||||
0.3765, 0.1255, 0.2510,
|
||||
0.8784, 0.1255, 0.2510,
|
||||
0.3765, 0.6275, 0.2510,
|
||||
0.8784, 0.6275, 0.2510,
|
||||
0.3765, 0.1255, 0.7529,
|
||||
0.8784, 0.1255, 0.7529,
|
||||
0.3765, 0.6275, 0.7529,
|
||||
0.8784, 0.6275, 0.7529,
|
||||
0.1255, 0.3765, 0.2510,
|
||||
0.6275, 0.3765, 0.2510,
|
||||
0.1255, 0.8784, 0.2510,
|
||||
0.6275, 0.8784, 0.2510,
|
||||
0.1255, 0.3765, 0.7529,
|
||||
0.6275, 0.3765, 0.7529,
|
||||
0.1255, 0.8784, 0.7529,
|
||||
0.6275, 0.8784, 0.7529,
|
||||
0.3765, 0.3765, 0.2510,
|
||||
0.8784, 0.3765, 0.2510,
|
||||
0.3765, 0.8784, 0.2510,
|
||||
0.8784, 0.8784, 0.2510,
|
||||
0.3765, 0.3765, 0.7529,
|
||||
0.8784, 0.3765, 0.7529,
|
||||
0.3765, 0.8784, 0.7529,
|
||||
0.8784, 0.8784, 0.7529]
|
||||
144
test/yolov7-tracker/tracker/trackeval/baselines/stp.py
Normal file
144
test/yolov7-tracker/tracker/trackeval/baselines/stp.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""
|
||||
STP: Simplest Tracker Possible
|
||||
|
||||
Author: Jonathon Luiten
|
||||
|
||||
This simple tracker, simply assigns track IDs which maximise the 'bounding box IoU' between previous tracks and current
|
||||
detections. It is also able to match detections to tracks at more than one timestep previously.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
from multiprocessing.pool import Pool
|
||||
from multiprocessing import freeze_support
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
|
||||
from trackeval.baselines import baseline_utils as butils
|
||||
from trackeval.utils import get_code_path
|
||||
|
||||
code_path = get_code_path()
|
||||
config = {
|
||||
'INPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/non_overlap_supplied/data/'),
|
||||
'OUTPUT_FOL': os.path.join(code_path, 'data/trackers/rob_mots/{split}/STP/data/'),
|
||||
'SPLIT': 'train', # valid: 'train', 'val', 'test'.
|
||||
'Benchmarks': None, # If None, all benchmarks in SPLIT.
|
||||
|
||||
'Num_Parallel_Cores': None, # If None, run without parallel.
|
||||
|
||||
'DETECTION_THRESHOLD': 0.5,
|
||||
'ASSOCIATION_THRESHOLD': 1e-10,
|
||||
'MAX_FRAMES_SKIP': 7
|
||||
}
|
||||
|
||||
|
||||
def track_sequence(seq_file):
|
||||
|
||||
# Load input data from file (e.g. provided detections)
|
||||
# data format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
|
||||
data = butils.load_seq(seq_file)
|
||||
|
||||
# Where to accumulate output data for writing out
|
||||
output_data = []
|
||||
|
||||
# To ensure IDs are unique per object across all classes.
|
||||
curr_max_id = 0
|
||||
|
||||
# Run tracker for each class.
|
||||
for cls, cls_data in data.items():
|
||||
|
||||
# Initialize container for holding previously tracked objects.
|
||||
prev = {'boxes': np.empty((0, 4)),
|
||||
'ids': np.array([], np.int),
|
||||
'timesteps': np.array([])}
|
||||
|
||||
# Run tracker for each timestep.
|
||||
for timestep, t_data in enumerate(cls_data):
|
||||
|
||||
# Threshold detections.
|
||||
t_data = butils.threshold(t_data, config['DETECTION_THRESHOLD'])
|
||||
|
||||
# Convert mask dets to bounding boxes.
|
||||
boxes = butils.masks2boxes(t_data['mask_rles'], t_data['im_hs'], t_data['im_ws'])
|
||||
|
||||
# Calculate IoU between previous and current frame dets.
|
||||
ious = butils.box_iou(prev['boxes'], boxes)
|
||||
|
||||
# Score which decreases quickly for previous dets depending on how many timesteps before they come from.
|
||||
prev_timestep_scores = np.power(10, -1 * prev['timesteps'])
|
||||
|
||||
# Matching score is such that it first tries to match 'most recent timesteps',
|
||||
# and within each timestep maximised IoU.
|
||||
match_scores = prev_timestep_scores[:, np.newaxis] * ious
|
||||
|
||||
# Find best matching between current dets and previous tracks.
|
||||
match_rows, match_cols = butils.match(match_scores)
|
||||
|
||||
# Remove matches that have an IoU below a certain threshold.
|
||||
actually_matched_mask = ious[match_rows, match_cols] > config['ASSOCIATION_THRESHOLD']
|
||||
match_rows = match_rows[actually_matched_mask]
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
|
||||
# Assign the prev track ID to the current dets if they were matched.
|
||||
ids = np.nan * np.ones((len(boxes),), np.int)
|
||||
ids[match_cols] = prev['ids'][match_rows]
|
||||
|
||||
# Create new track IDs for dets that were not matched to previous tracks.
|
||||
num_not_matched = len(ids) - len(match_cols)
|
||||
new_ids = np.arange(curr_max_id + 1, curr_max_id + num_not_matched + 1)
|
||||
ids[np.isnan(ids)] = new_ids
|
||||
|
||||
# Update maximum ID to ensure future added tracks have a unique ID value.
|
||||
curr_max_id += num_not_matched
|
||||
|
||||
# Drop tracks from 'previous tracks' if they have not been matched in the last MAX_FRAMES_SKIP frames.
|
||||
unmatched_rows = [i for i in range(len(prev['ids'])) if
|
||||
i not in match_rows and (prev['timesteps'][i] + 1 <= config['MAX_FRAMES_SKIP'])]
|
||||
|
||||
# Update the set of previous tracking results to include the newly tracked detections.
|
||||
prev['ids'] = np.concatenate((ids, prev['ids'][unmatched_rows]), axis=0)
|
||||
prev['boxes'] = np.concatenate((np.atleast_2d(boxes), np.atleast_2d(prev['boxes'][unmatched_rows])), axis=0)
|
||||
prev['timesteps'] = np.concatenate((np.zeros((len(ids),)), prev['timesteps'][unmatched_rows] + 1), axis=0)
|
||||
|
||||
# Save result in output format to write to file later.
|
||||
# Output Format = [timestep ID class score im_h im_w mask_RLE]
|
||||
for i in range(len(t_data['ids'])):
|
||||
row = [timestep, int(ids[i]), cls, t_data['scores'][i], t_data['im_hs'][i], t_data['im_ws'][i],
|
||||
t_data['mask_rles'][i]]
|
||||
output_data.append(row)
|
||||
|
||||
# Write results to file
|
||||
out_file = seq_file.replace(config['INPUT_FOL'].format(split=config['SPLIT']),
|
||||
config['OUTPUT_FOL'].format(split=config['SPLIT']))
|
||||
butils.write_seq(output_data, out_file)
|
||||
|
||||
print('DONE:', seq_file)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# Required to fix bug in multiprocessing on windows.
|
||||
freeze_support()
|
||||
|
||||
# Obtain list of sequences to run tracker for.
|
||||
if config['Benchmarks']:
|
||||
benchmarks = config['Benchmarks']
|
||||
else:
|
||||
benchmarks = ['davis_unsupervised', 'kitti_mots', 'youtube_vis', 'ovis', 'bdd_mots', 'tao']
|
||||
if config['SPLIT'] != 'train':
|
||||
benchmarks += ['waymo', 'mots_challenge']
|
||||
seqs_todo = []
|
||||
for bench in benchmarks:
|
||||
bench_fol = os.path.join(config['INPUT_FOL'].format(split=config['SPLIT']), bench)
|
||||
seqs_todo += [os.path.join(bench_fol, seq) for seq in os.listdir(bench_fol)]
|
||||
|
||||
# Run in parallel
|
||||
if config['Num_Parallel_Cores']:
|
||||
with Pool(config['Num_Parallel_Cores']) as pool:
|
||||
results = pool.map(track_sequence, seqs_todo)
|
||||
|
||||
# Run in series
|
||||
else:
|
||||
for seq_todo in seqs_todo:
|
||||
track_sequence(seq_todo)
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
"""
|
||||
Thresholder
|
||||
|
||||
Author: Jonathon Luiten
|
||||
|
||||
Simply reads in a set of detection, thresholds them at a certain score threshold, and writes them out again.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from multiprocessing.pool import Pool
|
||||
from multiprocessing import freeze_support
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
|
||||
from trackeval.baselines import baseline_utils as butils
|
||||
from trackeval.utils import get_code_path
|
||||
|
||||
THRESHOLD = 0.2
|
||||
|
||||
code_path = get_code_path()
|
||||
config = {
|
||||
'INPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/non_overlap_supplied/data/'),
|
||||
'OUTPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/threshold_' + str(100*THRESHOLD) + '/data/'),
|
||||
'SPLIT': 'train', # valid: 'train', 'val', 'test'.
|
||||
'Benchmarks': None, # If None, all benchmarks in SPLIT.
|
||||
|
||||
'Num_Parallel_Cores': None, # If None, run without parallel.
|
||||
|
||||
'DETECTION_THRESHOLD': THRESHOLD,
|
||||
}
|
||||
|
||||
|
||||
def do_sequence(seq_file):
|
||||
|
||||
# Load input data from file (e.g. provided detections)
|
||||
# data format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
|
||||
data = butils.load_seq(seq_file)
|
||||
|
||||
# Where to accumulate output data for writing out
|
||||
output_data = []
|
||||
|
||||
# Run for each class.
|
||||
for cls, cls_data in data.items():
|
||||
|
||||
# Run for each timestep.
|
||||
for timestep, t_data in enumerate(cls_data):
|
||||
|
||||
# Threshold detections.
|
||||
t_data = butils.threshold(t_data, config['DETECTION_THRESHOLD'])
|
||||
|
||||
# Save result in output format to write to file later.
|
||||
# Output Format = [timestep ID class score im_h im_w mask_RLE]
|
||||
for i in range(len(t_data['ids'])):
|
||||
row = [timestep, int(t_data['ids'][i]), cls, t_data['scores'][i], t_data['im_hs'][i],
|
||||
t_data['im_ws'][i], t_data['mask_rles'][i]]
|
||||
output_data.append(row)
|
||||
|
||||
# Write results to file
|
||||
out_file = seq_file.replace(config['INPUT_FOL'].format(split=config['SPLIT']),
|
||||
config['OUTPUT_FOL'].format(split=config['SPLIT']))
|
||||
butils.write_seq(output_data, out_file)
|
||||
|
||||
print('DONE:', seq_todo)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# Required to fix bug in multiprocessing on windows.
|
||||
freeze_support()
|
||||
|
||||
# Obtain list of sequences to run tracker for.
|
||||
if config['Benchmarks']:
|
||||
benchmarks = config['Benchmarks']
|
||||
else:
|
||||
benchmarks = ['davis_unsupervised', 'kitti_mots', 'youtube_vis', 'ovis', 'bdd_mots', 'tao']
|
||||
if config['SPLIT'] != 'train':
|
||||
benchmarks += ['waymo', 'mots_challenge']
|
||||
seqs_todo = []
|
||||
for bench in benchmarks:
|
||||
bench_fol = os.path.join(config['INPUT_FOL'].format(split=config['SPLIT']), bench)
|
||||
seqs_todo += [os.path.join(bench_fol, seq) for seq in os.listdir(bench_fol)]
|
||||
|
||||
# Run in parallel
|
||||
if config['Num_Parallel_Cores']:
|
||||
with Pool(config['Num_Parallel_Cores']) as pool:
|
||||
results = pool.map(do_sequence, seqs_todo)
|
||||
|
||||
# Run in series
|
||||
else:
|
||||
for seq_todo in seqs_todo:
|
||||
do_sequence(seq_todo)
|
||||
|
||||
94
test/yolov7-tracker/tracker/trackeval/baselines/vizualize.py
Normal file
94
test/yolov7-tracker/tracker/trackeval/baselines/vizualize.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""
|
||||
Vizualize: Code which converts .txt rle tracking results into a visual .png format.
|
||||
|
||||
Author: Jonathon Luiten
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from multiprocessing.pool import Pool
|
||||
from multiprocessing import freeze_support
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
|
||||
from trackeval.baselines import baseline_utils as butils
|
||||
from trackeval.utils import get_code_path
|
||||
from trackeval.datasets.rob_mots_classmap import cls_id_to_name
|
||||
|
||||
code_path = get_code_path()
|
||||
config = {
|
||||
# Tracker format:
|
||||
'INPUT_FOL': os.path.join(code_path, 'data/trackers/rob_mots/{split}/STP/data/{bench}'),
|
||||
'OUTPUT_FOL': os.path.join(code_path, 'data/viz/rob_mots/{split}/STP/data/{bench}'),
|
||||
# GT format:
|
||||
# 'INPUT_FOL': os.path.join(code_path, 'data/gt/rob_mots/{split}/{bench}/data/'),
|
||||
# 'OUTPUT_FOL': os.path.join(code_path, 'data/gt_viz/rob_mots/{split}/{bench}/'),
|
||||
'SPLIT': 'train', # valid: 'train', 'val', 'test'.
|
||||
'Benchmarks': None, # If None, all benchmarks in SPLIT.
|
||||
'Num_Parallel_Cores': None, # If None, run without parallel.
|
||||
}
|
||||
|
||||
|
||||
def do_sequence(seq_file):
|
||||
# Folder to save resulting visualization in
|
||||
out_fol = seq_file.replace(config['INPUT_FOL'].format(split=config['SPLIT'], bench=bench),
|
||||
config['OUTPUT_FOL'].format(split=config['SPLIT'], bench=bench)).replace('.txt', '')
|
||||
|
||||
# Load input data from file (e.g. provided detections)
|
||||
# data format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
|
||||
data = butils.load_seq(seq_file)
|
||||
|
||||
# Get frame size for visualizing empty frames
|
||||
im_h, im_w = butils.get_frame_size(data)
|
||||
|
||||
# First run for each class.
|
||||
for cls, cls_data in data.items():
|
||||
|
||||
if cls >= 100:
|
||||
continue
|
||||
|
||||
# Run for each timestep.
|
||||
for timestep, t_data in enumerate(cls_data):
|
||||
# Save out visualization
|
||||
out_file = os.path.join(out_fol, cls_id_to_name[cls], str(timestep).zfill(5) + '.png')
|
||||
butils.save_as_png(t_data, out_file, im_h, im_w)
|
||||
|
||||
|
||||
# Then run for all classes combined
|
||||
# Converts data from a class-separated to a class-combined format.
|
||||
data = butils.combine_classes(data)
|
||||
|
||||
# Run for each timestep.
|
||||
for timestep, t_data in enumerate(data):
|
||||
# Save out visualization
|
||||
out_file = os.path.join(out_fol, 'all_classes', str(timestep).zfill(5) + '.png')
|
||||
butils.save_as_png(t_data, out_file, im_h, im_w)
|
||||
|
||||
print('DONE:', seq_file)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# Required to fix bug in multiprocessing on windows.
|
||||
freeze_support()
|
||||
|
||||
# Obtain list of sequences to run tracker for.
|
||||
if config['Benchmarks']:
|
||||
benchmarks = config['Benchmarks']
|
||||
else:
|
||||
benchmarks = ['davis_unsupervised', 'kitti_mots', 'youtube_vis', 'ovis', 'bdd_mots', 'tao']
|
||||
if config['SPLIT'] != 'train':
|
||||
benchmarks += ['waymo', 'mots_challenge']
|
||||
seqs_todo = []
|
||||
for bench in benchmarks:
|
||||
bench_fol = config['INPUT_FOL'].format(split=config['SPLIT'], bench=bench)
|
||||
seqs_todo += [os.path.join(bench_fol, seq) for seq in os.listdir(bench_fol)]
|
||||
|
||||
# Run in parallel
|
||||
if config['Num_Parallel_Cores']:
|
||||
with Pool(config['Num_Parallel_Cores']) as pool:
|
||||
results = pool.map(do_sequence, seqs_todo)
|
||||
|
||||
# Run in series
|
||||
else:
|
||||
for seq_todo in seqs_todo:
|
||||
do_sequence(seq_todo)
|
||||
15
test/yolov7-tracker/tracker/trackeval/datasets/__init__.py
Normal file
15
test/yolov7-tracker/tracker/trackeval/datasets/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from .kitti_2d_box import Kitti2DBox
|
||||
from .kitti_mots import KittiMOTS
|
||||
from .mot_challenge_2d_box import MotChallenge2DBox
|
||||
from .mots_challenge import MOTSChallenge
|
||||
from .bdd100k import BDD100K
|
||||
from .davis import DAVIS
|
||||
from .tao import TAO
|
||||
from .tao_ow import TAO_OW
|
||||
from .burst import BURST
|
||||
from .burst_ow import BURST_OW
|
||||
from .youtube_vis import YouTubeVIS
|
||||
from .head_tracking_challenge import HeadTrackingChallenge
|
||||
from .rob_mots import RobMOTS
|
||||
from .person_path_22 import PersonPath22
|
||||
from .visdrone import VisDrone2DBox
|
||||
326
test/yolov7-tracker/tracker/trackeval/datasets/_base_dataset.py
Normal file
326
test/yolov7-tracker/tracker/trackeval/datasets/_base_dataset.py
Normal file
@@ -0,0 +1,326 @@
|
||||
import csv
|
||||
import io
|
||||
import zipfile
|
||||
import os
|
||||
import traceback
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
from abc import ABC, abstractmethod
|
||||
from .. import _timing
|
||||
from ..utils import TrackEvalException
|
||||
|
||||
|
||||
class _BaseDataset(ABC):
|
||||
@abstractmethod
|
||||
def __init__(self):
|
||||
self.tracker_list = None
|
||||
self.seq_list = None
|
||||
self.class_list = None
|
||||
self.output_fol = None
|
||||
self.output_sub_fol = None
|
||||
self.should_classes_combine = True
|
||||
self.use_super_categories = False
|
||||
|
||||
# Functions to implement:
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def get_default_dataset_config():
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
...
|
||||
|
||||
@_timing.time
|
||||
@abstractmethod
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
...
|
||||
|
||||
# Helper functions for all datasets:
|
||||
|
||||
@classmethod
|
||||
def get_class_name(cls):
|
||||
return cls.__name__
|
||||
|
||||
def get_name(self):
|
||||
return self.get_class_name()
|
||||
|
||||
def get_output_fol(self, tracker):
|
||||
return os.path.join(self.output_fol, tracker, self.output_sub_fol)
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
""" Can be overwritten if the trackers name (in files) is different to how it should be displayed.
|
||||
By default this method just returns the trackers name as is.
|
||||
"""
|
||||
return tracker
|
||||
|
||||
def get_eval_info(self):
|
||||
"""Return info about the dataset needed for the Evaluator"""
|
||||
return self.tracker_list, self.seq_list, self.class_list
|
||||
|
||||
@_timing.time
|
||||
def get_raw_seq_data(self, tracker, seq):
|
||||
""" Loads raw data (tracker and ground-truth) for a single tracker on a single sequence.
|
||||
Raw data includes all of the information needed for both preprocessing and evaluation, for all classes.
|
||||
A later function (get_processed_seq_data) will perform such preprocessing and extract relevant information for
|
||||
the evaluation of each class.
|
||||
|
||||
This returns a dict which contains the fields:
|
||||
[num_timesteps]: integer
|
||||
[gt_ids, tracker_ids, gt_classes, tracker_classes, tracker_confidences]:
|
||||
list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
[gt_extras]: dict (for each extra) of lists (for each timestep) of 1D NDArrays (for each det).
|
||||
|
||||
gt_extras contains dataset specific information used for preprocessing such as occlusion and truncation levels.
|
||||
|
||||
Note that similarities are extracted as part of the dataset and not the metric, because almost all metrics are
|
||||
independent of the exact method of calculating the similarity. However datasets are not (e.g. segmentation
|
||||
masks vs 2D boxes vs 3D boxes).
|
||||
We calculate the similarity before preprocessing because often both preprocessing and evaluation require it and
|
||||
we don't wish to calculate this twice.
|
||||
We calculate similarity between all gt and tracker classes (not just each class individually) to allow for
|
||||
calculation of metrics such as class confusion matrices. Typically the impact of this on performance is low.
|
||||
"""
|
||||
# Load raw data.
|
||||
raw_gt_data = self._load_raw_file(tracker, seq, is_gt=True)
|
||||
raw_tracker_data = self._load_raw_file(tracker, seq, is_gt=False)
|
||||
raw_data = {**raw_tracker_data, **raw_gt_data} # Merges dictionaries
|
||||
|
||||
# Calculate similarities for each timestep.
|
||||
similarity_scores = []
|
||||
for t, (gt_dets_t, tracker_dets_t) in enumerate(zip(raw_data['gt_dets'], raw_data['tracker_dets'])):
|
||||
ious = self._calculate_similarities(gt_dets_t, tracker_dets_t)
|
||||
similarity_scores.append(ious)
|
||||
raw_data['similarity_scores'] = similarity_scores
|
||||
return raw_data
|
||||
|
||||
@staticmethod
|
||||
def _load_simple_text_file(file, time_col=0, id_col=None, remove_negative_ids=False, valid_filter=None,
|
||||
crowd_ignore_filter=None, convert_filter=None, is_zipped=False, zip_file=None,
|
||||
force_delimiters=None):
|
||||
""" Function that loads data which is in a commonly used text file format.
|
||||
Assumes each det is given by one row of a text file.
|
||||
There is no limit to the number or meaning of each column,
|
||||
however one column needs to give the timestep of each det (time_col) which is default col 0.
|
||||
|
||||
The file dialect (deliminator, num cols, etc) is determined automatically.
|
||||
This function automatically separates dets by timestep,
|
||||
and is much faster than alternatives such as np.loadtext or pandas.
|
||||
|
||||
If remove_negative_ids is True and id_col is not None, dets with negative values in id_col are excluded.
|
||||
These are not excluded from ignore data.
|
||||
|
||||
valid_filter can be used to only include certain classes.
|
||||
It is a dict with ints as keys, and lists as values,
|
||||
such that a row is included if "row[key].lower() is in value" for all key/value pairs in the dict.
|
||||
If None, all classes are included.
|
||||
|
||||
crowd_ignore_filter can be used to read crowd_ignore regions separately. It has the same format as valid filter.
|
||||
|
||||
convert_filter can be used to convert value read to another format.
|
||||
This is used most commonly to convert classes given as string to a class id.
|
||||
This is a dict such that the key is the column to convert, and the value is another dict giving the mapping.
|
||||
|
||||
Optionally, input files could be a zip of multiple text files for storage efficiency.
|
||||
|
||||
Returns read_data and ignore_data.
|
||||
Each is a dict (with keys as timesteps as strings) of lists (over dets) of lists (over column values).
|
||||
Note that all data is returned as strings, and must be converted to float/int later if needed.
|
||||
Note that timesteps will not be present in the returned dict keys if there are no dets for them
|
||||
"""
|
||||
|
||||
if remove_negative_ids and id_col is None:
|
||||
raise TrackEvalException('remove_negative_ids is True, but id_col is not given.')
|
||||
if crowd_ignore_filter is None:
|
||||
crowd_ignore_filter = {}
|
||||
if convert_filter is None:
|
||||
convert_filter = {}
|
||||
try:
|
||||
if is_zipped: # Either open file directly or within a zip.
|
||||
if zip_file is None:
|
||||
raise TrackEvalException('is_zipped set to True, but no zip_file is given.')
|
||||
archive = zipfile.ZipFile(os.path.join(zip_file), 'r')
|
||||
fp = io.TextIOWrapper(archive.open(file, 'r'))
|
||||
else:
|
||||
fp = open(file)
|
||||
read_data = {}
|
||||
crowd_ignore_data = {}
|
||||
fp.seek(0, os.SEEK_END)
|
||||
# check if file is empty
|
||||
if fp.tell():
|
||||
fp.seek(0)
|
||||
dialect = csv.Sniffer().sniff(fp.readline(), delimiters=force_delimiters) # Auto determine structure.
|
||||
dialect.skipinitialspace = True # Deal with extra spaces between columns
|
||||
fp.seek(0)
|
||||
reader = csv.reader(fp, dialect)
|
||||
for row in reader:
|
||||
try:
|
||||
# Deal with extra trailing spaces at the end of rows
|
||||
if row[-1] in '':
|
||||
row = row[:-1]
|
||||
timestep = str(int(float(row[time_col])))
|
||||
# Read ignore regions separately.
|
||||
is_ignored = False
|
||||
for ignore_key, ignore_value in crowd_ignore_filter.items():
|
||||
if row[ignore_key].lower() in ignore_value:
|
||||
# Convert values in one column (e.g. string to id)
|
||||
for convert_key, convert_value in convert_filter.items():
|
||||
row[convert_key] = convert_value[row[convert_key].lower()]
|
||||
# Save data separated by timestep.
|
||||
if timestep in crowd_ignore_data.keys():
|
||||
crowd_ignore_data[timestep].append(row)
|
||||
else:
|
||||
crowd_ignore_data[timestep] = [row]
|
||||
is_ignored = True
|
||||
if is_ignored: # if det is an ignore region, it cannot be a normal det.
|
||||
continue
|
||||
# Exclude some dets if not valid.
|
||||
if valid_filter is not None:
|
||||
for key, value in valid_filter.items():
|
||||
if row[key].lower() not in value:
|
||||
continue
|
||||
if remove_negative_ids:
|
||||
if int(float(row[id_col])) < 0:
|
||||
continue
|
||||
# Convert values in one column (e.g. string to id)
|
||||
for convert_key, convert_value in convert_filter.items():
|
||||
row[convert_key] = convert_value[row[convert_key].lower()]
|
||||
# Save data separated by timestep.
|
||||
if timestep in read_data.keys():
|
||||
read_data[timestep].append(row)
|
||||
else:
|
||||
read_data[timestep] = [row]
|
||||
except Exception:
|
||||
exc_str_init = 'In file %s the following line cannot be read correctly: \n' % os.path.basename(
|
||||
file)
|
||||
exc_str = ' '.join([exc_str_init]+row)
|
||||
raise TrackEvalException(exc_str)
|
||||
fp.close()
|
||||
except Exception:
|
||||
print('Error loading file: %s, printing traceback.' % file)
|
||||
traceback.print_exc()
|
||||
raise TrackEvalException(
|
||||
'File %s cannot be read because it is either not present or invalidly formatted' % os.path.basename(
|
||||
file))
|
||||
return read_data, crowd_ignore_data
|
||||
|
||||
@staticmethod
|
||||
def _calculate_mask_ious(masks1, masks2, is_encoded=False, do_ioa=False):
|
||||
""" Calculates the IOU (intersection over union) between two arrays of segmentation masks.
|
||||
If is_encoded a run length encoding with pycocotools is assumed as input format, otherwise an input of numpy
|
||||
arrays of the shape (num_masks, height, width) is assumed and the encoding is performed.
|
||||
If do_ioa (intersection over area) , then calculates the intersection over the area of masks1 - this is commonly
|
||||
used to determine if detections are within crowd ignore region.
|
||||
:param masks1: first set of masks (numpy array of shape (num_masks, height, width) if not encoded,
|
||||
else pycocotools rle encoded format)
|
||||
:param masks2: second set of masks (numpy array of shape (num_masks, height, width) if not encoded,
|
||||
else pycocotools rle encoded format)
|
||||
:param is_encoded: whether the input is in pycocotools rle encoded format
|
||||
:param do_ioa: whether to perform IoA computation
|
||||
:return: the IoU/IoA scores
|
||||
"""
|
||||
|
||||
# Only loaded when run to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
|
||||
# use pycocotools for run length encoding of masks
|
||||
if not is_encoded:
|
||||
masks1 = mask_utils.encode(np.array(np.transpose(masks1, (1, 2, 0)), order='F'))
|
||||
masks2 = mask_utils.encode(np.array(np.transpose(masks2, (1, 2, 0)), order='F'))
|
||||
|
||||
# use pycocotools for iou computation of rle encoded masks
|
||||
ious = mask_utils.iou(masks1, masks2, [do_ioa]*len(masks2))
|
||||
if len(masks1) == 0 or len(masks2) == 0:
|
||||
ious = np.asarray(ious).reshape(len(masks1), len(masks2))
|
||||
assert (ious >= 0 - np.finfo('float').eps).all()
|
||||
assert (ious <= 1 + np.finfo('float').eps).all()
|
||||
|
||||
return ious
|
||||
|
||||
@staticmethod
|
||||
def _calculate_box_ious(bboxes1, bboxes2, box_format='xywh', do_ioa=False):
|
||||
""" Calculates the IOU (intersection over union) between two arrays of boxes.
|
||||
Allows variable box formats ('xywh' and 'x0y0x1y1').
|
||||
If do_ioa (intersection over area) , then calculates the intersection over the area of boxes1 - this is commonly
|
||||
used to determine if detections are within crowd ignore region.
|
||||
"""
|
||||
if box_format in 'xywh':
|
||||
# layout: (x0, y0, w, h)
|
||||
bboxes1 = deepcopy(bboxes1)
|
||||
bboxes2 = deepcopy(bboxes2)
|
||||
|
||||
bboxes1[:, 2] = bboxes1[:, 0] + bboxes1[:, 2]
|
||||
bboxes1[:, 3] = bboxes1[:, 1] + bboxes1[:, 3]
|
||||
bboxes2[:, 2] = bboxes2[:, 0] + bboxes2[:, 2]
|
||||
bboxes2[:, 3] = bboxes2[:, 1] + bboxes2[:, 3]
|
||||
elif box_format not in 'x0y0x1y1':
|
||||
raise (TrackEvalException('box_format %s is not implemented' % box_format))
|
||||
|
||||
# layout: (x0, y0, x1, y1)
|
||||
min_ = np.minimum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
|
||||
max_ = np.maximum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
|
||||
intersection = np.maximum(min_[..., 2] - max_[..., 0], 0) * np.maximum(min_[..., 3] - max_[..., 1], 0)
|
||||
area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
|
||||
|
||||
if do_ioa:
|
||||
ioas = np.zeros_like(intersection)
|
||||
valid_mask = area1 > 0 + np.finfo('float').eps
|
||||
ioas[valid_mask, :] = intersection[valid_mask, :] / area1[valid_mask][:, np.newaxis]
|
||||
|
||||
return ioas
|
||||
else:
|
||||
area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
|
||||
union = area1[:, np.newaxis] + area2[np.newaxis, :] - intersection
|
||||
intersection[area1 <= 0 + np.finfo('float').eps, :] = 0
|
||||
intersection[:, area2 <= 0 + np.finfo('float').eps] = 0
|
||||
intersection[union <= 0 + np.finfo('float').eps] = 0
|
||||
union[union <= 0 + np.finfo('float').eps] = 1
|
||||
ious = intersection / union
|
||||
return ious
|
||||
|
||||
@staticmethod
|
||||
def _calculate_euclidean_similarity(dets1, dets2, zero_distance=2.0):
|
||||
""" Calculates the euclidean distance between two sets of detections, and then converts this into a similarity
|
||||
measure with values between 0 and 1 using the following formula: sim = max(0, 1 - dist/zero_distance).
|
||||
The default zero_distance of 2.0, corresponds to the default used in MOT15_3D, such that a 0.5 similarity
|
||||
threshold corresponds to a 1m distance threshold for TPs.
|
||||
"""
|
||||
dist = np.linalg.norm(dets1[:, np.newaxis]-dets2[np.newaxis, :], axis=2)
|
||||
sim = np.maximum(0, 1 - dist/zero_distance)
|
||||
return sim
|
||||
|
||||
@staticmethod
|
||||
def _check_unique_ids(data, after_preproc=False):
|
||||
"""Check the requirement that the tracker_ids and gt_ids are unique per timestep"""
|
||||
gt_ids = data['gt_ids']
|
||||
tracker_ids = data['tracker_ids']
|
||||
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(gt_ids, tracker_ids)):
|
||||
if len(tracker_ids_t) > 0:
|
||||
unique_ids, counts = np.unique(tracker_ids_t, return_counts=True)
|
||||
if np.max(counts) != 1:
|
||||
duplicate_ids = unique_ids[counts > 1]
|
||||
exc_str_init = 'Tracker predicts the same ID more than once in a single timestep ' \
|
||||
'(seq: %s, frame: %i, ids:' % (data['seq'], t+1)
|
||||
exc_str = ' '.join([exc_str_init] + [str(d) for d in duplicate_ids]) + ')'
|
||||
if after_preproc:
|
||||
exc_str_init += '\n Note that this error occurred after preprocessing (but not before), ' \
|
||||
'so ids may not be as in file, and something seems wrong with preproc.'
|
||||
raise TrackEvalException(exc_str)
|
||||
if len(gt_ids_t) > 0:
|
||||
unique_ids, counts = np.unique(gt_ids_t, return_counts=True)
|
||||
if np.max(counts) != 1:
|
||||
duplicate_ids = unique_ids[counts > 1]
|
||||
exc_str_init = 'Ground-truth has the same ID more than once in a single timestep ' \
|
||||
'(seq: %s, frame: %i, ids:' % (data['seq'], t+1)
|
||||
exc_str = ' '.join([exc_str_init] + [str(d) for d in duplicate_ids]) + ')'
|
||||
if after_preproc:
|
||||
exc_str_init += '\n Note that this error occurred after preprocessing (but not before), ' \
|
||||
'so ids may not be as in file, and something seems wrong with preproc.'
|
||||
raise TrackEvalException(exc_str)
|
||||
302
test/yolov7-tracker/tracker/trackeval/datasets/bdd100k.py
Normal file
302
test/yolov7-tracker/tracker/trackeval/datasets/bdd100k.py
Normal file
@@ -0,0 +1,302 @@
|
||||
|
||||
import os
|
||||
import json
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ..utils import TrackEvalException
|
||||
from ._base_dataset import _BaseDataset
|
||||
from .. import utils
|
||||
from .. import _timing
|
||||
|
||||
|
||||
class BDD100K(_BaseDataset):
|
||||
"""Dataset class for BDD100K tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/bdd100k/bdd100k_val'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/bdd100k/bdd100k_val'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'CLASSES_TO_EVAL': ['pedestrian', 'rider', 'car', 'bus', 'truck', 'train', 'motorcycle', 'bicycle'],
|
||||
# Valid: ['pedestrian', 'rider', 'car', 'bus', 'truck', 'train', 'motorcycle', 'bicycle']
|
||||
'SPLIT_TO_EVAL': 'val', # Valid: 'training', 'val',
|
||||
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
self.gt_fol = self.config['GT_FOLDER']
|
||||
self.tracker_fol = self.config['TRACKERS_FOLDER']
|
||||
self.should_classes_combine = True
|
||||
self.use_super_categories = True
|
||||
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
|
||||
# Get classes to eval
|
||||
self.valid_classes = ['pedestrian', 'rider', 'car', 'bus', 'truck', 'train', 'motorcycle', 'bicycle']
|
||||
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
for cls in self.config['CLASSES_TO_EVAL']]
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes [pedestrian, rider, car, '
|
||||
'bus, truck, train, motorcycle, bicycle] are valid.')
|
||||
self.super_categories = {"HUMAN": [cls for cls in ["pedestrian", "rider"] if cls in self.class_list],
|
||||
"VEHICLE": [cls for cls in ["car", "truck", "bus", "train"] if cls in self.class_list],
|
||||
"BIKE": [cls for cls in ["motorcycle", "bicycle"] if cls in self.class_list]}
|
||||
self.distractor_classes = ['other person', 'trailer', 'other vehicle']
|
||||
self.class_name_to_class_id = {'pedestrian': 1, 'rider': 2, 'other person': 3, 'car': 4, 'bus': 5, 'truck': 6,
|
||||
'train': 7, 'trailer': 8, 'other vehicle': 9, 'motorcycle': 10, 'bicycle': 11}
|
||||
|
||||
# Get sequences to eval
|
||||
self.seq_list = []
|
||||
self.seq_lengths = {}
|
||||
|
||||
self.seq_list = [seq_file.replace('.json', '') for seq_file in os.listdir(self.gt_fol)]
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
for tracker in self.tracker_list:
|
||||
for seq in self.seq_list:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.json')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException(
|
||||
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
|
||||
curr_file))
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the BDD100K format
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
"""
|
||||
# File location
|
||||
if is_gt:
|
||||
file = os.path.join(self.gt_fol, seq + '.json')
|
||||
else:
|
||||
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.json')
|
||||
|
||||
with open(file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
# sort data by frame index
|
||||
data = sorted(data, key=lambda x: x['index'])
|
||||
|
||||
# check sequence length
|
||||
if is_gt:
|
||||
self.seq_lengths[seq] = len(data)
|
||||
num_timesteps = len(data)
|
||||
else:
|
||||
num_timesteps = self.seq_lengths[seq]
|
||||
if num_timesteps != len(data):
|
||||
raise TrackEvalException('Number of ground truth and tracker timesteps do not match for sequence %s'
|
||||
% seq)
|
||||
|
||||
# Convert data to required format
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if is_gt:
|
||||
data_keys += ['gt_crowd_ignore_regions']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
for t in range(num_timesteps):
|
||||
ig_ids = []
|
||||
keep_ids = []
|
||||
for i in range(len(data[t]['labels'])):
|
||||
ann = data[t]['labels'][i]
|
||||
if is_gt and (ann['category'] in self.distractor_classes or 'attributes' in ann.keys()
|
||||
and ann['attributes']['Crowd']):
|
||||
ig_ids.append(i)
|
||||
else:
|
||||
keep_ids.append(i)
|
||||
|
||||
if keep_ids:
|
||||
raw_data['dets'][t] = np.atleast_2d([[data[t]['labels'][i]['box2d']['x1'],
|
||||
data[t]['labels'][i]['box2d']['y1'],
|
||||
data[t]['labels'][i]['box2d']['x2'],
|
||||
data[t]['labels'][i]['box2d']['y2']
|
||||
] for i in keep_ids]).astype(float)
|
||||
raw_data['ids'][t] = np.atleast_1d([data[t]['labels'][i]['id'] for i in keep_ids]).astype(int)
|
||||
raw_data['classes'][t] = np.atleast_1d([self.class_name_to_class_id[data[t]['labels'][i]['category']]
|
||||
for i in keep_ids]).astype(int)
|
||||
else:
|
||||
raw_data['dets'][t] = np.empty((0, 4)).astype(float)
|
||||
raw_data['ids'][t] = np.empty(0).astype(int)
|
||||
raw_data['classes'][t] = np.empty(0).astype(int)
|
||||
|
||||
if is_gt:
|
||||
if ig_ids:
|
||||
raw_data['gt_crowd_ignore_regions'][t] = np.atleast_2d([[data[t]['labels'][i]['box2d']['x1'],
|
||||
data[t]['labels'][i]['box2d']['y1'],
|
||||
data[t]['labels'][i]['box2d']['x2'],
|
||||
data[t]['labels'][i]['box2d']['y2']
|
||||
] for i in ig_ids]).astype(float)
|
||||
else:
|
||||
raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4)).astype(float)
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
raw_data['num_timesteps'] = num_timesteps
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
|
||||
BDD100K:
|
||||
In BDD100K, the 4 preproc steps are as follow:
|
||||
1) There are eight classes (pedestrian, rider, car, bus, truck, train, motorcycle, bicycle)
|
||||
which are evaluated separately.
|
||||
2) For BDD100K there is no removal of matched tracker dets.
|
||||
3) Crowd ignore regions are used to remove unmatched detections.
|
||||
4) No removal of gt dets.
|
||||
"""
|
||||
cls_id = self.class_name_to_class_id[cls]
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Only extract relevant dets for this class for preproc and eval (cls)
|
||||
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
|
||||
gt_class_mask = gt_class_mask.astype(np.bool)
|
||||
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
|
||||
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
|
||||
|
||||
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
|
||||
tracker_class_mask = tracker_class_mask.astype(np.bool)
|
||||
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
|
||||
tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
|
||||
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
|
||||
|
||||
# Match tracker and gt dets (with hungarian algorithm)
|
||||
unmatched_indices = np.arange(tracker_ids.shape[0])
|
||||
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
|
||||
matching_scores = similarity_scores.copy()
|
||||
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
|
||||
match_rows, match_cols = linear_sum_assignment(-matching_scores)
|
||||
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
|
||||
|
||||
# For unmatched tracker dets, remove those that are greater than 50% within a crowd ignore region.
|
||||
unmatched_tracker_dets = tracker_dets[unmatched_indices, :]
|
||||
crowd_ignore_regions = raw_data['gt_crowd_ignore_regions'][t]
|
||||
intersection_with_ignore_region = self._calculate_box_ious(unmatched_tracker_dets, crowd_ignore_regions,
|
||||
box_format='x0y0x1y1', do_ioa=True)
|
||||
is_within_crowd_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps,
|
||||
axis=1)
|
||||
|
||||
# Apply preprocessing to remove unwanted tracker dets.
|
||||
to_remove_tracker = unmatched_indices[is_within_crowd_ignore_region]
|
||||
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
|
||||
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
|
||||
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
|
||||
|
||||
data['gt_ids'][t] = gt_ids
|
||||
data['gt_dets'][t] = gt_dets
|
||||
data['similarity_scores'][t] = similarity_scores
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
|
||||
# Ensure that ids are unique per timestep.
|
||||
self._check_unique_ids(data)
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='x0y0x1y1')
|
||||
return similarity_scores
|
||||
49
test/yolov7-tracker/tracker/trackeval/datasets/burst.py
Normal file
49
test/yolov7-tracker/tracker/trackeval/datasets/burst.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import os
|
||||
from .burst_helpers.burst_base import BURSTBase
|
||||
from .burst_helpers.format_converter import GroundTruthBURSTFormatToTAOFormatConverter, PredictionBURSTFormatToTAOFormatConverter
|
||||
from .. import utils
|
||||
|
||||
|
||||
class BURST(BURSTBase):
|
||||
"""Dataset class for TAO tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
tao_config = BURSTBase.get_default_dataset_config()
|
||||
code_path = utils.get_code_path()
|
||||
|
||||
# e.g. 'data/gt/tsunami/exemplar_guided/'
|
||||
tao_config['GT_FOLDER'] = os.path.join(
|
||||
code_path, 'data/gt/burst/val/') # Location of GT data
|
||||
# e.g. 'data/trackers/tsunami/exemplar_guided/mask_guided/validation/'
|
||||
tao_config['TRACKERS_FOLDER'] = os.path.join(
|
||||
code_path, 'data/trackers/burst/class-guided/') # Trackers location
|
||||
# set to True or False
|
||||
tao_config['EXEMPLAR_GUIDED'] = False
|
||||
return tao_config
|
||||
|
||||
def _iou_type(self):
|
||||
return 'mask'
|
||||
|
||||
def _box_or_mask_from_det(self, det):
|
||||
return det['segmentation']
|
||||
|
||||
def _calculate_area_for_ann(self, ann):
|
||||
import pycocotools.mask as cocomask
|
||||
return cocomask.area(ann["segmentation"])
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
|
||||
return similarity_scores
|
||||
|
||||
def _is_exemplar_guided(self):
|
||||
exemplar_guided = self.config['EXEMPLAR_GUIDED']
|
||||
return exemplar_guided
|
||||
|
||||
def _postproc_ground_truth_data(self, data):
|
||||
return GroundTruthBURSTFormatToTAOFormatConverter(data).convert()
|
||||
|
||||
def _postproc_prediction_data(self, data):
|
||||
return PredictionBURSTFormatToTAOFormatConverter(
|
||||
self.gt_data, data,
|
||||
exemplar_guided=self._is_exemplar_guided()).convert()
|
||||
@@ -0,0 +1,7 @@
|
||||
The track ids in both ground truth and predictions are not globally unique, but
|
||||
start from 1 for each video. At the moment when converting from Ali format to
|
||||
TAO format, we remap the ids to be globally unique. It would be better to
|
||||
directly have this in the data though.
|
||||
|
||||
|
||||
Improve setting of EXEMPLAR_GUIDED flag, maybe this can be done automatically.
|
||||
@@ -0,0 +1,591 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import json
|
||||
import itertools
|
||||
from collections import defaultdict
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from trackeval.utils import TrackEvalException
|
||||
from trackeval.datasets._base_dataset import _BaseDataset
|
||||
from trackeval import utils
|
||||
from trackeval import _timing
|
||||
|
||||
|
||||
class BURSTBase(_BaseDataset):
|
||||
"""Dataset class for TAO tracking"""
|
||||
|
||||
def _postproc_ground_truth_data(self, data):
|
||||
return data
|
||||
|
||||
def _postproc_prediction_data(self, data):
|
||||
return data
|
||||
|
||||
def _iou_type(self):
|
||||
return 'bbox'
|
||||
|
||||
def _box_or_mask_from_det(self, det):
|
||||
return np.atleast_1d(det['bbox'])
|
||||
|
||||
def _calculate_area_for_ann(self, ann):
|
||||
return ann["bbox"][2] * ann["bbox"][3]
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes)
|
||||
'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val'
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
'MAX_DETECTIONS': 300, # Number of maximal allowed detections per image (0 for unlimited)
|
||||
'EXEMPLAR_GUIDED': False,
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
self.gt_fol = self.config['GT_FOLDER']
|
||||
self.tracker_fol = self.config['TRACKERS_FOLDER']
|
||||
self.should_classes_combine = True
|
||||
self.use_super_categories = False
|
||||
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
|
||||
gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
|
||||
if len(gt_dir_files) != 1:
|
||||
raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
|
||||
|
||||
with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
|
||||
self.gt_data = self._postproc_ground_truth_data(json.load(f))
|
||||
|
||||
# merge categories marked with a merged tag in TAO dataset
|
||||
self._merge_categories(self.gt_data['annotations'] + self.gt_data['tracks'])
|
||||
|
||||
# Get sequences to eval and sequence information
|
||||
self.seq_list = [vid['name'].replace('/', '-') for vid in self.gt_data['videos']]
|
||||
self.seq_name_to_seq_id = {vid['name'].replace('/', '-'): vid['id'] for vid in self.gt_data['videos']}
|
||||
# compute mappings from videos to annotation data
|
||||
self.videos_to_gt_tracks, self.videos_to_gt_images = self._compute_vid_mappings(self.gt_data['annotations'])
|
||||
# compute sequence lengths
|
||||
self.seq_lengths = {vid['id']: 0 for vid in self.gt_data['videos']}
|
||||
for img in self.gt_data['images']:
|
||||
self.seq_lengths[img['video_id']] += 1
|
||||
self.seq_to_images_to_timestep = self._compute_image_to_timestep_mappings()
|
||||
self.seq_to_classes = {vid['id']: {'pos_cat_ids': list({track['category_id'] for track
|
||||
in self.videos_to_gt_tracks[vid['id']]}),
|
||||
'neg_cat_ids': vid['neg_category_ids'],
|
||||
'not_exhaustively_labeled_cat_ids': vid['not_exhaustive_category_ids']}
|
||||
for vid in self.gt_data['videos']}
|
||||
|
||||
# Get classes to eval
|
||||
considered_vid_ids = [self.seq_name_to_seq_id[vid] for vid in self.seq_list]
|
||||
seen_cats = set([cat_id for vid_id in considered_vid_ids for cat_id
|
||||
in self.seq_to_classes[vid_id]['pos_cat_ids']])
|
||||
# only classes with ground truth are evaluated in TAO, also we don't evaluate distactors.
|
||||
distractors = {20, 63, 108, 180, 188, 204, 212, 247, 303, 403, 407, 415, 490, 504, 507, 513, 529, 567,
|
||||
569, 588, 672, 691, 702, 708, 711, 720, 736, 737, 798, 813, 815, 827, 831, 851, 877, 883,
|
||||
912, 971, 976, 1130, 1133, 1134, 1169, 1184, 1220}
|
||||
self.valid_classes = [cls['name'] for cls in self.gt_data['categories'] if (cls['id'] in seen_cats) and (cls['id'] not in distractors)]
|
||||
cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
|
||||
|
||||
if self.config['CLASSES_TO_EVAL']:
|
||||
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
for cls in self.config['CLASSES_TO_EVAL']]
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
|
||||
', '.join(self.valid_classes) +
|
||||
' are valid (classes present in ground truth data).')
|
||||
else:
|
||||
self.class_list = [cls for cls in self.valid_classes]
|
||||
self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
|
||||
|
||||
for tracker in self.tracker_list:
|
||||
tr_dir_files = [file for file in os.listdir(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol))
|
||||
if file.endswith('.json')]
|
||||
if len(tr_dir_files) != 1:
|
||||
raise TrackEvalException(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
|
||||
+ ' does not contain exactly one json file.')
|
||||
with open(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, tr_dir_files[0])) as f:
|
||||
curr_data = self._postproc_prediction_data(json.load(f))
|
||||
|
||||
# limit detections if MAX_DETECTIONS > 0
|
||||
if self.config['MAX_DETECTIONS']:
|
||||
curr_data = self._limit_dets_per_image(curr_data)
|
||||
|
||||
# fill missing video ids
|
||||
self._fill_video_ids_inplace(curr_data)
|
||||
|
||||
# make track ids unique over whole evaluation set
|
||||
self._make_track_ids_unique(curr_data)
|
||||
|
||||
# merge categories marked with a merged tag in TAO dataset
|
||||
self._merge_categories(curr_data)
|
||||
|
||||
# get tracker sequence information
|
||||
curr_videos_to_tracker_tracks, curr_videos_to_tracker_images = self._compute_vid_mappings(curr_data)
|
||||
self.tracker_data[tracker]['vids_to_tracks'] = curr_videos_to_tracker_tracks
|
||||
self.tracker_data[tracker]['vids_to_images'] = curr_videos_to_tracker_images
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the TAO format
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets]: list (for each timestep) of lists of detections.
|
||||
[classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
|
||||
keys and corresponding segmentations as values) for each track
|
||||
[classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_lengths]: dictionary with class values
|
||||
as keys and lists (for each track) as values
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
|
||||
keys and corresponding segmentations as values) for each track
|
||||
[classes_to_dt_track_ids, classes_to_dt_track_areas, classes_to_dt_track_lengths]: dictionary with class values
|
||||
as keys and lists as values
|
||||
[classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
|
||||
"""
|
||||
seq_id = self.seq_name_to_seq_id[seq]
|
||||
# File location
|
||||
if is_gt:
|
||||
imgs = self.videos_to_gt_images[seq_id]
|
||||
else:
|
||||
imgs = self.tracker_data[tracker]['vids_to_images'][seq_id]
|
||||
|
||||
# Convert data to required format
|
||||
num_timesteps = self.seq_lengths[seq_id]
|
||||
img_to_timestep = self.seq_to_images_to_timestep[seq_id]
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if not is_gt:
|
||||
data_keys += ['tracker_confidences']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
for img in imgs:
|
||||
# some tracker data contains images without any ground truth information, these are ignored
|
||||
try:
|
||||
t = img_to_timestep[img['id']]
|
||||
except KeyError:
|
||||
continue
|
||||
annotations = img['annotations']
|
||||
raw_data['dets'][t] = np.atleast_2d([ann['bbox'] for ann in annotations]).astype(float)
|
||||
raw_data['ids'][t] = np.atleast_1d([ann['track_id'] for ann in annotations]).astype(int)
|
||||
raw_data['classes'][t] = np.atleast_1d([ann['category_id'] for ann in annotations]).astype(int)
|
||||
if not is_gt:
|
||||
raw_data['tracker_confidences'][t] = np.atleast_1d([ann['score'] for ann in annotations]).astype(float)
|
||||
|
||||
for t, d in enumerate(raw_data['dets']):
|
||||
if d is None:
|
||||
raw_data['dets'][t] = np.empty((0, 4)).astype(float)
|
||||
raw_data['ids'][t] = np.empty(0).astype(int)
|
||||
raw_data['classes'][t] = np.empty(0).astype(int)
|
||||
if not is_gt:
|
||||
raw_data['tracker_confidences'][t] = np.empty(0)
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
|
||||
all_classes = [self.class_name_to_class_id[cls] for cls in self.class_list]
|
||||
if is_gt:
|
||||
classes_to_consider = all_classes
|
||||
all_tracks = self.videos_to_gt_tracks[seq_id]
|
||||
else:
|
||||
classes_to_consider = self.seq_to_classes[seq_id]['pos_cat_ids'] \
|
||||
+ self.seq_to_classes[seq_id]['neg_cat_ids']
|
||||
all_tracks = self.tracker_data[tracker]['vids_to_tracks'][seq_id]
|
||||
|
||||
classes_to_tracks = {cls: [track for track in all_tracks if track['category_id'] == cls]
|
||||
if cls in classes_to_consider else [] for cls in all_classes}
|
||||
|
||||
# mapping from classes to track information
|
||||
raw_data['classes_to_tracks'] = {cls: [{det['image_id']: self._box_or_mask_from_det(det)
|
||||
for det in track['annotations']} for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_lengths'] = {cls: [len(track['annotations']) for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
|
||||
if not is_gt:
|
||||
raw_data['classes_to_dt_track_scores'] = {cls: np.array([np.mean([float(x['score'])
|
||||
for x in track['annotations']])
|
||||
for track in tracks])
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
|
||||
if is_gt:
|
||||
key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
|
||||
'classes_to_track_ids': 'classes_to_gt_track_ids',
|
||||
'classes_to_track_lengths': 'classes_to_gt_track_lengths',
|
||||
'classes_to_track_areas': 'classes_to_gt_track_areas'}
|
||||
else:
|
||||
key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
|
||||
'classes_to_track_ids': 'classes_to_dt_track_ids',
|
||||
'classes_to_track_lengths': 'classes_to_dt_track_lengths',
|
||||
'classes_to_track_areas': 'classes_to_dt_track_areas'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
|
||||
raw_data['num_timesteps'] = num_timesteps
|
||||
raw_data['neg_cat_ids'] = self.seq_to_classes[seq_id]['neg_cat_ids']
|
||||
raw_data['not_exhaustively_labeled_cls'] = self.seq_to_classes[seq_id]['not_exhaustively_labeled_cat_ids']
|
||||
raw_data['seq'] = seq
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
TAO:
|
||||
In TAO, the 4 preproc steps are as follow:
|
||||
1) All classes present in the ground truth data are evaluated separately.
|
||||
2) No matched tracker detections are removed.
|
||||
3) Unmatched tracker detections are removed if there is not ground truth data and the class does not
|
||||
belong to the categories marked as negative for this sequence. Additionally, unmatched tracker
|
||||
detections for classes which are marked as not exhaustively labeled are removed.
|
||||
4) No gt detections are removed.
|
||||
Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
|
||||
and the tracks from the tracker data are sorted according to the tracker confidence.
|
||||
"""
|
||||
cls_id = self.class_name_to_class_id[cls]
|
||||
is_not_exhaustively_labeled = cls_id in raw_data['not_exhaustively_labeled_cls']
|
||||
is_neg_category = cls_id in raw_data['neg_cat_ids']
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Only extract relevant dets for this class for preproc and eval (cls)
|
||||
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
|
||||
gt_class_mask = gt_class_mask.astype(np.bool)
|
||||
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
|
||||
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
|
||||
|
||||
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
|
||||
tracker_class_mask = tracker_class_mask.astype(np.bool)
|
||||
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
|
||||
tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
|
||||
tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
|
||||
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
|
||||
|
||||
if not self.config['EXEMPLAR_GUIDED']:
|
||||
# Match tracker and gt dets (with hungarian algorithm).
|
||||
unmatched_indices = np.arange(tracker_ids.shape[0])
|
||||
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
|
||||
matching_scores = similarity_scores.copy()
|
||||
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
|
||||
match_rows, match_cols = linear_sum_assignment(-matching_scores)
|
||||
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
|
||||
|
||||
if gt_ids.shape[0] == 0 and not is_neg_category:
|
||||
to_remove_tracker = unmatched_indices
|
||||
elif is_not_exhaustively_labeled:
|
||||
to_remove_tracker = unmatched_indices
|
||||
else:
|
||||
to_remove_tracker = np.array([], dtype=np.int)
|
||||
|
||||
# remove all unwanted unmatched tracker detections
|
||||
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
|
||||
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
|
||||
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
|
||||
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
|
||||
else:
|
||||
data['tracker_ids'][t] = tracker_ids
|
||||
data['tracker_dets'][t] = tracker_dets
|
||||
data['tracker_confidences'][t] = tracker_confidences
|
||||
|
||||
data['gt_ids'][t] = gt_ids
|
||||
data['gt_dets'][t] = gt_dets
|
||||
data['similarity_scores'][t] = similarity_scores
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
data['seq'] = raw_data['seq']
|
||||
|
||||
# get track representations
|
||||
data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
|
||||
data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
|
||||
data['gt_track_lengths'] = raw_data['classes_to_gt_track_lengths'][cls_id]
|
||||
data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
|
||||
data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
|
||||
data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
|
||||
data['dt_track_lengths'] = raw_data['classes_to_dt_track_lengths'][cls_id]
|
||||
data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
|
||||
data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
|
||||
data['not_exhaustively_labeled'] = is_not_exhaustively_labeled
|
||||
data['iou_type'] = self._iou_type()
|
||||
|
||||
# sort tracker data tracks by tracker confidence scores
|
||||
if data['dt_tracks']:
|
||||
idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
|
||||
data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
|
||||
data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
|
||||
data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
|
||||
data['dt_track_lengths'] = [data['dt_track_lengths'][i] for i in idx]
|
||||
data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
|
||||
# Ensure that ids are unique per timestep.
|
||||
self._check_unique_ids(data)
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t)
|
||||
return similarity_scores
|
||||
|
||||
def _merge_categories(self, annotations):
|
||||
"""
|
||||
Merges categories with a merged tag. Adapted from https://github.com/TAO-Dataset
|
||||
:param annotations: the annotations in which the classes should be merged
|
||||
:return: None
|
||||
"""
|
||||
merge_map = {}
|
||||
for category in self.gt_data['categories']:
|
||||
if 'merged' in category:
|
||||
for to_merge in category['merged']:
|
||||
merge_map[to_merge['id']] = category['id']
|
||||
|
||||
for ann in annotations:
|
||||
ann['category_id'] = merge_map.get(ann['category_id'], ann['category_id'])
|
||||
|
||||
def _compute_vid_mappings(self, annotations):
|
||||
"""
|
||||
Computes mappings from Videos to corresponding tracks and images.
|
||||
:param annotations: the annotations for which the mapping should be generated
|
||||
:return: the video-to-track-mapping, the video-to-image-mapping
|
||||
"""
|
||||
vids_to_tracks = {}
|
||||
vids_to_imgs = {}
|
||||
vid_ids = [vid['id'] for vid in self.gt_data['videos']]
|
||||
|
||||
# compute an mapping from image IDs to images
|
||||
images = {}
|
||||
for image in self.gt_data['images']:
|
||||
images[image['id']] = image
|
||||
|
||||
for ann in annotations:
|
||||
ann["area"] = self._calculate_area_for_ann(ann)
|
||||
|
||||
vid = ann["video_id"]
|
||||
if ann["video_id"] not in vids_to_tracks.keys():
|
||||
vids_to_tracks[ann["video_id"]] = list()
|
||||
if ann["video_id"] not in vids_to_imgs.keys():
|
||||
vids_to_imgs[ann["video_id"]] = list()
|
||||
|
||||
# Fill in vids_to_tracks
|
||||
tid = ann["track_id"]
|
||||
exist_tids = [track["id"] for track in vids_to_tracks[vid]]
|
||||
try:
|
||||
index1 = exist_tids.index(tid)
|
||||
except ValueError:
|
||||
index1 = -1
|
||||
if tid not in exist_tids:
|
||||
curr_track = {"id": tid, "category_id": ann["category_id"],
|
||||
"video_id": vid, "annotations": [ann]}
|
||||
vids_to_tracks[vid].append(curr_track)
|
||||
else:
|
||||
vids_to_tracks[vid][index1]["annotations"].append(ann)
|
||||
|
||||
# Fill in vids_to_imgs
|
||||
img_id = ann['image_id']
|
||||
exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
|
||||
try:
|
||||
index2 = exist_img_ids.index(img_id)
|
||||
except ValueError:
|
||||
index2 = -1
|
||||
if index2 == -1:
|
||||
curr_img = {"id": img_id, "annotations": [ann]}
|
||||
vids_to_imgs[vid].append(curr_img)
|
||||
else:
|
||||
vids_to_imgs[vid][index2]["annotations"].append(ann)
|
||||
|
||||
# sort annotations by frame index and compute track area
|
||||
for vid, tracks in vids_to_tracks.items():
|
||||
for track in tracks:
|
||||
track["annotations"] = sorted(
|
||||
track['annotations'],
|
||||
key=lambda x: images[x['image_id']]['frame_index'])
|
||||
# Computer average area
|
||||
track["area"] = (sum(x['area'] for x in track['annotations']) / len(track['annotations']))
|
||||
|
||||
# Ensure all videos are present
|
||||
for vid_id in vid_ids:
|
||||
if vid_id not in vids_to_tracks.keys():
|
||||
vids_to_tracks[vid_id] = []
|
||||
if vid_id not in vids_to_imgs.keys():
|
||||
vids_to_imgs[vid_id] = []
|
||||
|
||||
return vids_to_tracks, vids_to_imgs
|
||||
|
||||
def _compute_image_to_timestep_mappings(self):
|
||||
"""
|
||||
Computes a mapping from images to the corresponding timestep in the sequence.
|
||||
:return: the image-to-timestep-mapping
|
||||
"""
|
||||
images = {}
|
||||
for image in self.gt_data['images']:
|
||||
images[image['id']] = image
|
||||
|
||||
seq_to_imgs_to_timestep = {vid['id']: dict() for vid in self.gt_data['videos']}
|
||||
for vid in seq_to_imgs_to_timestep:
|
||||
curr_imgs = [img['id'] for img in self.videos_to_gt_images[vid]]
|
||||
curr_imgs = sorted(curr_imgs, key=lambda x: images[x]['frame_index'])
|
||||
seq_to_imgs_to_timestep[vid] = {curr_imgs[i]: i for i in range(len(curr_imgs))}
|
||||
|
||||
return seq_to_imgs_to_timestep
|
||||
|
||||
def _limit_dets_per_image(self, annotations):
|
||||
"""
|
||||
Limits the number of detections for each image to config['MAX_DETECTIONS']. Adapted from
|
||||
https://github.com/TAO-Dataset/
|
||||
:param annotations: the annotations in which the detections should be limited
|
||||
:return: the annotations with limited detections
|
||||
"""
|
||||
max_dets = self.config['MAX_DETECTIONS']
|
||||
img_ann = defaultdict(list)
|
||||
for ann in annotations:
|
||||
img_ann[ann["image_id"]].append(ann)
|
||||
|
||||
for img_id, _anns in img_ann.items():
|
||||
if len(_anns) <= max_dets:
|
||||
continue
|
||||
_anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
|
||||
img_ann[img_id] = _anns[:max_dets]
|
||||
|
||||
return [ann for anns in img_ann.values() for ann in anns]
|
||||
|
||||
def _fill_video_ids_inplace(self, annotations):
|
||||
"""
|
||||
Fills in missing video IDs inplace. Adapted from https://github.com/TAO-Dataset/
|
||||
:param annotations: the annotations for which the videos IDs should be filled inplace
|
||||
:return: None
|
||||
"""
|
||||
missing_video_id = [x for x in annotations if 'video_id' not in x]
|
||||
if missing_video_id:
|
||||
image_id_to_video_id = {
|
||||
x['id']: x['video_id'] for x in self.gt_data['images']
|
||||
}
|
||||
for x in missing_video_id:
|
||||
x['video_id'] = image_id_to_video_id[x['image_id']]
|
||||
|
||||
@staticmethod
|
||||
def _make_track_ids_unique(annotations):
|
||||
"""
|
||||
Makes the track IDs unqiue over the whole annotation set. Adapted from https://github.com/TAO-Dataset/
|
||||
:param annotations: the annotation set
|
||||
:return: the number of updated IDs
|
||||
"""
|
||||
track_id_videos = {}
|
||||
track_ids_to_update = set()
|
||||
max_track_id = 0
|
||||
for ann in annotations:
|
||||
t = ann['track_id']
|
||||
if t not in track_id_videos:
|
||||
track_id_videos[t] = ann['video_id']
|
||||
|
||||
if ann['video_id'] != track_id_videos[t]:
|
||||
# Track id is assigned to multiple videos
|
||||
track_ids_to_update.add(t)
|
||||
max_track_id = max(max_track_id, t)
|
||||
|
||||
if track_ids_to_update:
|
||||
#print('true')
|
||||
next_id = itertools.count(max_track_id + 1)
|
||||
new_track_ids = defaultdict(lambda: next(next_id))
|
||||
for ann in annotations:
|
||||
t = ann['track_id']
|
||||
v = ann['video_id']
|
||||
if t in track_ids_to_update:
|
||||
ann['track_id'] = new_track_ids[t, v]
|
||||
return len(track_ids_to_update)
|
||||
@@ -0,0 +1,675 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import json
|
||||
import itertools
|
||||
from collections import defaultdict
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from trackeval.utils import TrackEvalException
|
||||
from trackeval.datasets._base_dataset import _BaseDataset
|
||||
from trackeval import utils
|
||||
from trackeval import _timing
|
||||
|
||||
|
||||
class BURST_OW_Base(_BaseDataset):
|
||||
"""Dataset class for TAO tracking"""
|
||||
|
||||
def _postproc_ground_truth_data(self, data):
|
||||
return data
|
||||
|
||||
def _postproc_prediction_data(self, data):
|
||||
return data
|
||||
|
||||
def _iou_type(self):
|
||||
return 'bbox'
|
||||
|
||||
def _box_or_mask_from_det(self, det):
|
||||
return np.atleast_1d(det['bbox'])
|
||||
|
||||
def _calculate_area_for_ann(self, ann):
|
||||
return ann["bbox"][2] * ann["bbox"][3]
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes)
|
||||
'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val'
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
'MAX_DETECTIONS': 300, # Number of maximal allowed detections per image (0 for unlimited)
|
||||
'SUBSET': 'all'
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
self.gt_fol = self.config['GT_FOLDER']
|
||||
self.tracker_fol = self.config['TRACKERS_FOLDER']
|
||||
self.should_classes_combine = True
|
||||
self.use_super_categories = False
|
||||
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
|
||||
gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
|
||||
if len(gt_dir_files) != 1:
|
||||
raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
|
||||
|
||||
with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
|
||||
self.gt_data = self._postproc_ground_truth_data(json.load(f))
|
||||
|
||||
self.subset = self.config['SUBSET']
|
||||
if self.subset != 'all':
|
||||
# Split GT data into `known`, `unknown` or `distractor`
|
||||
self._split_known_unknown_distractor()
|
||||
self.gt_data = self._filter_gt_data(self.gt_data)
|
||||
|
||||
# merge categories marked with a merged tag in TAO dataset
|
||||
self._merge_categories(self.gt_data['annotations'] + self.gt_data['tracks'])
|
||||
|
||||
# Get sequences to eval and sequence information
|
||||
self.seq_list = [vid['name'].replace('/', '-') for vid in self.gt_data['videos']]
|
||||
self.seq_name_to_seq_id = {vid['name'].replace('/', '-'): vid['id'] for vid in self.gt_data['videos']}
|
||||
# compute mappings from videos to annotation data
|
||||
self.videos_to_gt_tracks, self.videos_to_gt_images = self._compute_vid_mappings(self.gt_data['annotations'])
|
||||
# compute sequence lengths
|
||||
self.seq_lengths = {vid['id']: 0 for vid in self.gt_data['videos']}
|
||||
for img in self.gt_data['images']:
|
||||
self.seq_lengths[img['video_id']] += 1
|
||||
self.seq_to_images_to_timestep = self._compute_image_to_timestep_mappings()
|
||||
self.seq_to_classes = {vid['id']: {'pos_cat_ids': list({track['category_id'] for track
|
||||
in self.videos_to_gt_tracks[vid['id']]}),
|
||||
'neg_cat_ids': vid['neg_category_ids'],
|
||||
'not_exhaustively_labeled_cat_ids': vid['not_exhaustive_category_ids']}
|
||||
for vid in self.gt_data['videos']}
|
||||
|
||||
# Get classes to eval
|
||||
considered_vid_ids = [self.seq_name_to_seq_id[vid] for vid in self.seq_list]
|
||||
seen_cats = set([cat_id for vid_id in considered_vid_ids for cat_id
|
||||
in self.seq_to_classes[vid_id]['pos_cat_ids']])
|
||||
# only classes with ground truth are evaluated in TAO
|
||||
self.valid_classes = [cls['name'] for cls in self.gt_data['categories'] if cls['id'] in seen_cats]
|
||||
# cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
|
||||
|
||||
if self.config['CLASSES_TO_EVAL']:
|
||||
# self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
# for cls in self.config['CLASSES_TO_EVAL']]
|
||||
self.class_list = ["object"] # class-agnostic
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
|
||||
', '.join(self.valid_classes) +
|
||||
' are valid (classes present in ground truth data).')
|
||||
else:
|
||||
# self.class_list = [cls for cls in self.valid_classes]
|
||||
self.class_list = ["object"] # class-agnostic
|
||||
# self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
|
||||
self.class_name_to_class_id = {"object": 1} # class-agnostic
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
|
||||
|
||||
for tracker in self.tracker_list:
|
||||
tr_dir_files = [file for file in os.listdir(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol))
|
||||
if file.endswith('.json')]
|
||||
if len(tr_dir_files) != 1:
|
||||
raise TrackEvalException(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
|
||||
+ ' does not contain exactly one json file.')
|
||||
with open(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, tr_dir_files[0])) as f:
|
||||
curr_data = self._postproc_prediction_data(json.load(f))
|
||||
|
||||
# limit detections if MAX_DETECTIONS > 0
|
||||
if self.config['MAX_DETECTIONS']:
|
||||
curr_data = self._limit_dets_per_image(curr_data)
|
||||
|
||||
# fill missing video ids
|
||||
self._fill_video_ids_inplace(curr_data)
|
||||
|
||||
# make track ids unique over whole evaluation set
|
||||
self._make_track_ids_unique(curr_data)
|
||||
|
||||
# merge categories marked with a merged tag in TAO dataset
|
||||
self._merge_categories(curr_data)
|
||||
|
||||
# get tracker sequence information
|
||||
curr_videos_to_tracker_tracks, curr_videos_to_tracker_images = self._compute_vid_mappings(curr_data)
|
||||
self.tracker_data[tracker]['vids_to_tracks'] = curr_videos_to_tracker_tracks
|
||||
self.tracker_data[tracker]['vids_to_images'] = curr_videos_to_tracker_images
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the TAO format
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets]: list (for each timestep) of lists of detections.
|
||||
[classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
|
||||
keys and corresponding segmentations as values) for each track
|
||||
[classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_lengths]: dictionary with class values
|
||||
as keys and lists (for each track) as values
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
|
||||
keys and corresponding segmentations as values) for each track
|
||||
[classes_to_dt_track_ids, classes_to_dt_track_areas, classes_to_dt_track_lengths]: dictionary with class values
|
||||
as keys and lists as values
|
||||
[classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
|
||||
"""
|
||||
seq_id = self.seq_name_to_seq_id[seq]
|
||||
# File location
|
||||
if is_gt:
|
||||
imgs = self.videos_to_gt_images[seq_id]
|
||||
else:
|
||||
imgs = self.tracker_data[tracker]['vids_to_images'][seq_id]
|
||||
|
||||
# Convert data to required format
|
||||
num_timesteps = self.seq_lengths[seq_id]
|
||||
img_to_timestep = self.seq_to_images_to_timestep[seq_id]
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if not is_gt:
|
||||
data_keys += ['tracker_confidences']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
for img in imgs:
|
||||
# some tracker data contains images without any ground truth information, these are ignored
|
||||
try:
|
||||
t = img_to_timestep[img['id']]
|
||||
except KeyError:
|
||||
continue
|
||||
annotations = img['annotations']
|
||||
raw_data['dets'][t] = np.atleast_2d([ann['bbox'] for ann in annotations]).astype(float)
|
||||
raw_data['ids'][t] = np.atleast_1d([ann['track_id'] for ann in annotations]).astype(int)
|
||||
raw_data['classes'][t] = np.atleast_1d([1 for _ in annotations]).astype(int) # class-agnostic
|
||||
if not is_gt:
|
||||
raw_data['tracker_confidences'][t] = np.atleast_1d([ann['score'] for ann in annotations]).astype(float)
|
||||
|
||||
for t, d in enumerate(raw_data['dets']):
|
||||
if d is None:
|
||||
raw_data['dets'][t] = np.empty((0, 4)).astype(float)
|
||||
raw_data['ids'][t] = np.empty(0).astype(int)
|
||||
raw_data['classes'][t] = np.empty(0).astype(int)
|
||||
if not is_gt:
|
||||
raw_data['tracker_confidences'][t] = np.empty(0)
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
|
||||
# all_classes = [self.class_name_to_class_id[cls] for cls in self.class_list]
|
||||
all_classes = [1] # class-agnostic
|
||||
|
||||
if is_gt:
|
||||
classes_to_consider = all_classes
|
||||
all_tracks = self.videos_to_gt_tracks[seq_id]
|
||||
else:
|
||||
# classes_to_consider = self.seq_to_classes[seq_id]['pos_cat_ids'] \
|
||||
# + self.seq_to_classes[seq_id]['neg_cat_ids']
|
||||
classes_to_consider = all_classes # class-agnostic
|
||||
all_tracks = self.tracker_data[tracker]['vids_to_tracks'][seq_id]
|
||||
|
||||
# classes_to_tracks = {cls: [track for track in all_tracks if track['category_id'] == cls]
|
||||
# if cls in classes_to_consider else [] for cls in all_classes}
|
||||
classes_to_tracks = {cls: [track for track in all_tracks]
|
||||
if cls in classes_to_consider else [] for cls in all_classes} # class-agnostic
|
||||
|
||||
# mapping from classes to track information
|
||||
raw_data['classes_to_tracks'] = {cls: [{det['image_id']: self._box_or_mask_from_det(det)
|
||||
for det in track['annotations']} for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_lengths'] = {cls: [len(track['annotations']) for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
|
||||
if not is_gt:
|
||||
raw_data['classes_to_dt_track_scores'] = {cls: np.array([np.mean([float(x['score'])
|
||||
for x in track['annotations']])
|
||||
for track in tracks])
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
|
||||
if is_gt:
|
||||
key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
|
||||
'classes_to_track_ids': 'classes_to_gt_track_ids',
|
||||
'classes_to_track_lengths': 'classes_to_gt_track_lengths',
|
||||
'classes_to_track_areas': 'classes_to_gt_track_areas'}
|
||||
else:
|
||||
key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
|
||||
'classes_to_track_ids': 'classes_to_dt_track_ids',
|
||||
'classes_to_track_lengths': 'classes_to_dt_track_lengths',
|
||||
'classes_to_track_areas': 'classes_to_dt_track_areas'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
|
||||
raw_data['num_timesteps'] = num_timesteps
|
||||
raw_data['neg_cat_ids'] = self.seq_to_classes[seq_id]['neg_cat_ids']
|
||||
raw_data['not_exhaustively_labeled_cls'] = self.seq_to_classes[seq_id]['not_exhaustively_labeled_cat_ids']
|
||||
raw_data['seq'] = seq
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
TAO:
|
||||
In TAO, the 4 preproc steps are as follow:
|
||||
1) All classes present in the ground truth data are evaluated separately.
|
||||
2) No matched tracker detections are removed.
|
||||
3) Unmatched tracker detections are removed if there is not ground truth data and the class does not
|
||||
belong to the categories marked as negative for this sequence. Additionally, unmatched tracker
|
||||
detections for classes which are marked as not exhaustively labeled are removed.
|
||||
4) No gt detections are removed.
|
||||
Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
|
||||
and the tracks from the tracker data are sorted according to the tracker confidence.
|
||||
"""
|
||||
cls_id = self.class_name_to_class_id[cls]
|
||||
is_not_exhaustively_labeled = cls_id in raw_data['not_exhaustively_labeled_cls']
|
||||
is_neg_category = cls_id in raw_data['neg_cat_ids']
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Only extract relevant dets for this class for preproc and eval (cls)
|
||||
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
|
||||
gt_class_mask = gt_class_mask.astype(np.bool)
|
||||
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
|
||||
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
|
||||
|
||||
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
|
||||
tracker_class_mask = tracker_class_mask.astype(np.bool)
|
||||
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
|
||||
tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
|
||||
tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
|
||||
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
|
||||
|
||||
# Match tracker and gt dets (with hungarian algorithm).
|
||||
unmatched_indices = np.arange(tracker_ids.shape[0])
|
||||
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
|
||||
matching_scores = similarity_scores.copy()
|
||||
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
|
||||
match_rows, match_cols = linear_sum_assignment(-matching_scores)
|
||||
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
|
||||
|
||||
if gt_ids.shape[0] == 0 and not is_neg_category:
|
||||
to_remove_tracker = unmatched_indices
|
||||
elif is_not_exhaustively_labeled:
|
||||
to_remove_tracker = unmatched_indices
|
||||
else:
|
||||
to_remove_tracker = np.array([], dtype=np.int)
|
||||
|
||||
# remove all unwanted unmatched tracker detections
|
||||
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
|
||||
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
|
||||
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
|
||||
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
|
||||
|
||||
data['gt_ids'][t] = gt_ids
|
||||
data['gt_dets'][t] = gt_dets
|
||||
data['similarity_scores'][t] = similarity_scores
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
data['seq'] = raw_data['seq']
|
||||
|
||||
# get track representations
|
||||
data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
|
||||
data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
|
||||
data['gt_track_lengths'] = raw_data['classes_to_gt_track_lengths'][cls_id]
|
||||
data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
|
||||
data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
|
||||
data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
|
||||
data['dt_track_lengths'] = raw_data['classes_to_dt_track_lengths'][cls_id]
|
||||
data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
|
||||
data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
|
||||
data['not_exhaustively_labeled'] = is_not_exhaustively_labeled
|
||||
data['iou_type'] = self._iou_type()
|
||||
|
||||
# sort tracker data tracks by tracker confidence scores
|
||||
if data['dt_tracks']:
|
||||
idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
|
||||
data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
|
||||
data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
|
||||
data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
|
||||
data['dt_track_lengths'] = [data['dt_track_lengths'][i] for i in idx]
|
||||
data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
|
||||
# Ensure that ids are unique per timestep.
|
||||
self._check_unique_ids(data)
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t)
|
||||
return similarity_scores
|
||||
|
||||
def _merge_categories(self, annotations):
|
||||
"""
|
||||
Merges categories with a merged tag. Adapted from https://github.com/TAO-Dataset
|
||||
:param annotations: the annotations in which the classes should be merged
|
||||
:return: None
|
||||
"""
|
||||
merge_map = {}
|
||||
for category in self.gt_data['categories']:
|
||||
if 'merged' in category:
|
||||
for to_merge in category['merged']:
|
||||
merge_map[to_merge['id']] = category['id']
|
||||
|
||||
for ann in annotations:
|
||||
ann['category_id'] = merge_map.get(ann['category_id'], ann['category_id'])
|
||||
|
||||
def _compute_vid_mappings(self, annotations):
|
||||
"""
|
||||
Computes mappings from Videos to corresponding tracks and images.
|
||||
:param annotations: the annotations for which the mapping should be generated
|
||||
:return: the video-to-track-mapping, the video-to-image-mapping
|
||||
"""
|
||||
vids_to_tracks = {}
|
||||
vids_to_imgs = {}
|
||||
vid_ids = [vid['id'] for vid in self.gt_data['videos']]
|
||||
|
||||
# compute an mapping from image IDs to images
|
||||
images = {}
|
||||
for image in self.gt_data['images']:
|
||||
images[image['id']] = image
|
||||
|
||||
for ann in annotations:
|
||||
ann["area"] = self._calculate_area_for_ann(ann)
|
||||
|
||||
vid = ann["video_id"]
|
||||
if ann["video_id"] not in vids_to_tracks.keys():
|
||||
vids_to_tracks[ann["video_id"]] = list()
|
||||
if ann["video_id"] not in vids_to_imgs.keys():
|
||||
vids_to_imgs[ann["video_id"]] = list()
|
||||
|
||||
# Fill in vids_to_tracks
|
||||
tid = ann["track_id"]
|
||||
exist_tids = [track["id"] for track in vids_to_tracks[vid]]
|
||||
try:
|
||||
index1 = exist_tids.index(tid)
|
||||
except ValueError:
|
||||
index1 = -1
|
||||
if tid not in exist_tids:
|
||||
curr_track = {"id": tid, "category_id": ann["category_id"],
|
||||
"video_id": vid, "annotations": [ann]}
|
||||
vids_to_tracks[vid].append(curr_track)
|
||||
else:
|
||||
vids_to_tracks[vid][index1]["annotations"].append(ann)
|
||||
|
||||
# Fill in vids_to_imgs
|
||||
img_id = ann['image_id']
|
||||
exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
|
||||
try:
|
||||
index2 = exist_img_ids.index(img_id)
|
||||
except ValueError:
|
||||
index2 = -1
|
||||
if index2 == -1:
|
||||
curr_img = {"id": img_id, "annotations": [ann]}
|
||||
vids_to_imgs[vid].append(curr_img)
|
||||
else:
|
||||
vids_to_imgs[vid][index2]["annotations"].append(ann)
|
||||
|
||||
# sort annotations by frame index and compute track area
|
||||
for vid, tracks in vids_to_tracks.items():
|
||||
for track in tracks:
|
||||
track["annotations"] = sorted(
|
||||
track['annotations'],
|
||||
key=lambda x: images[x['image_id']]['frame_index'])
|
||||
# Computer average area
|
||||
track["area"] = (sum(x['area'] for x in track['annotations']) / len(track['annotations']))
|
||||
|
||||
# Ensure all videos are present
|
||||
for vid_id in vid_ids:
|
||||
if vid_id not in vids_to_tracks.keys():
|
||||
vids_to_tracks[vid_id] = []
|
||||
if vid_id not in vids_to_imgs.keys():
|
||||
vids_to_imgs[vid_id] = []
|
||||
|
||||
return vids_to_tracks, vids_to_imgs
|
||||
|
||||
def _compute_image_to_timestep_mappings(self):
|
||||
"""
|
||||
Computes a mapping from images to the corresponding timestep in the sequence.
|
||||
:return: the image-to-timestep-mapping
|
||||
"""
|
||||
images = {}
|
||||
for image in self.gt_data['images']:
|
||||
images[image['id']] = image
|
||||
|
||||
seq_to_imgs_to_timestep = {vid['id']: dict() for vid in self.gt_data['videos']}
|
||||
for vid in seq_to_imgs_to_timestep:
|
||||
curr_imgs = [img['id'] for img in self.videos_to_gt_images[vid]]
|
||||
curr_imgs = sorted(curr_imgs, key=lambda x: images[x]['frame_index'])
|
||||
seq_to_imgs_to_timestep[vid] = {curr_imgs[i]: i for i in range(len(curr_imgs))}
|
||||
|
||||
return seq_to_imgs_to_timestep
|
||||
|
||||
def _limit_dets_per_image(self, annotations):
|
||||
"""
|
||||
Limits the number of detections for each image to config['MAX_DETECTIONS']. Adapted from
|
||||
https://github.com/TAO-Dataset/
|
||||
:param annotations: the annotations in which the detections should be limited
|
||||
:return: the annotations with limited detections
|
||||
"""
|
||||
max_dets = self.config['MAX_DETECTIONS']
|
||||
img_ann = defaultdict(list)
|
||||
for ann in annotations:
|
||||
img_ann[ann["image_id"]].append(ann)
|
||||
|
||||
for img_id, _anns in img_ann.items():
|
||||
if len(_anns) <= max_dets:
|
||||
continue
|
||||
_anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
|
||||
img_ann[img_id] = _anns[:max_dets]
|
||||
|
||||
return [ann for anns in img_ann.values() for ann in anns]
|
||||
|
||||
def _fill_video_ids_inplace(self, annotations):
|
||||
"""
|
||||
Fills in missing video IDs inplace. Adapted from https://github.com/TAO-Dataset/
|
||||
:param annotations: the annotations for which the videos IDs should be filled inplace
|
||||
:return: None
|
||||
"""
|
||||
missing_video_id = [x for x in annotations if 'video_id' not in x]
|
||||
if missing_video_id:
|
||||
image_id_to_video_id = {
|
||||
x['id']: x['video_id'] for x in self.gt_data['images']
|
||||
}
|
||||
for x in missing_video_id:
|
||||
x['video_id'] = image_id_to_video_id[x['image_id']]
|
||||
|
||||
@staticmethod
|
||||
def _make_track_ids_unique(annotations):
|
||||
"""
|
||||
Makes the track IDs unqiue over the whole annotation set. Adapted from https://github.com/TAO-Dataset/
|
||||
:param annotations: the annotation set
|
||||
:return: the number of updated IDs
|
||||
"""
|
||||
track_id_videos = {}
|
||||
track_ids_to_update = set()
|
||||
max_track_id = 0
|
||||
for ann in annotations:
|
||||
t = ann['track_id']
|
||||
if t not in track_id_videos:
|
||||
track_id_videos[t] = ann['video_id']
|
||||
|
||||
if ann['video_id'] != track_id_videos[t]:
|
||||
# Track id is assigned to multiple videos
|
||||
track_ids_to_update.add(t)
|
||||
max_track_id = max(max_track_id, t)
|
||||
|
||||
if track_ids_to_update:
|
||||
#print('true')
|
||||
next_id = itertools.count(max_track_id + 1)
|
||||
new_track_ids = defaultdict(lambda: next(next_id))
|
||||
for ann in annotations:
|
||||
t = ann['track_id']
|
||||
v = ann['video_id']
|
||||
if t in track_ids_to_update:
|
||||
ann['track_id'] = new_track_ids[t, v]
|
||||
return len(track_ids_to_update)
|
||||
|
||||
def _split_known_unknown_distractor(self):
|
||||
all_ids = set([i for i in range(1, 2000)]) # 2000 is larger than the max category id in TAO-OW.
|
||||
# `knowns` includes 78 TAO_category_ids that corresponds to 78 COCO classes.
|
||||
# (The other 2 COCO classes do not have corresponding classes in TAO).
|
||||
self.knowns = {4, 13, 1038, 544, 1057, 34, 35, 36, 41, 45, 58, 60, 579, 1091, 1097, 1099, 78, 79, 81, 91, 1115,
|
||||
1117, 95, 1122, 99, 1132, 621, 1135, 625, 118, 1144, 126, 642, 1155, 133, 1162, 139, 154, 174, 185,
|
||||
699, 1215, 714, 717, 1229, 211, 729, 221, 229, 747, 235, 237, 779, 276, 805, 299, 829, 852, 347,
|
||||
371, 382, 896, 392, 926, 937, 428, 429, 961, 452, 979, 980, 982, 475, 480, 993, 1001, 502, 1018}
|
||||
# `distractors` is defined as in the paper "Opening up Open-World Tracking"
|
||||
self.distractors = {20, 63, 108, 180, 188, 204, 212, 247, 303, 403, 407, 415, 490, 504, 507, 513, 529, 567,
|
||||
569, 588, 672, 691, 702, 708, 711, 720, 736, 737, 798, 813, 815, 827, 831, 851, 877, 883,
|
||||
912, 971, 976, 1130, 1133, 1134, 1169, 1184, 1220}
|
||||
self.unknowns = all_ids.difference(self.knowns.union(self.distractors))
|
||||
|
||||
def _filter_gt_data(self, raw_gt_data):
|
||||
"""
|
||||
Filter out irrelevant data in the raw_gt_data
|
||||
Args:
|
||||
raw_gt_data: directly loaded from json.
|
||||
|
||||
Returns:
|
||||
filtered gt_data
|
||||
"""
|
||||
valid_cat_ids = list()
|
||||
if self.subset == "known":
|
||||
valid_cat_ids = self.knowns
|
||||
elif self.subset == "distractor":
|
||||
valid_cat_ids = self.distractors
|
||||
elif self.subset == "unknown":
|
||||
valid_cat_ids = self.unknowns
|
||||
# elif self.subset == "test_only_unknowns":
|
||||
# valid_cat_ids = test_only_unknowns
|
||||
else:
|
||||
raise Exception("The parameter `SUBSET` is incorrect")
|
||||
|
||||
filtered = dict()
|
||||
filtered["videos"] = raw_gt_data["videos"]
|
||||
# filtered["videos"] = list()
|
||||
unwanted_vid = set()
|
||||
# for video in raw_gt_data["videos"]:
|
||||
# datasrc = video["name"].split('/')[1]
|
||||
# if datasrc in data_srcs:
|
||||
# filtered["videos"].append(video)
|
||||
# else:
|
||||
# unwanted_vid.add(video["id"])
|
||||
|
||||
filtered["annotations"] = list()
|
||||
for ann in raw_gt_data["annotations"]:
|
||||
if (ann["video_id"] not in unwanted_vid) and (ann["category_id"] in valid_cat_ids):
|
||||
filtered["annotations"].append(ann)
|
||||
|
||||
filtered["tracks"] = list()
|
||||
for track in raw_gt_data["tracks"]:
|
||||
if (track["video_id"] not in unwanted_vid) and (track["category_id"] in valid_cat_ids):
|
||||
filtered["tracks"].append(track)
|
||||
|
||||
filtered["images"] = list()
|
||||
for image in raw_gt_data["images"]:
|
||||
if image["video_id"] not in unwanted_vid:
|
||||
filtered["images"].append(image)
|
||||
|
||||
filtered["categories"] = list()
|
||||
for cat in raw_gt_data["categories"]:
|
||||
if cat["id"] in valid_cat_ids:
|
||||
filtered["categories"].append(cat)
|
||||
|
||||
if "info" in raw_gt_data:
|
||||
filtered["info"] = raw_gt_data["info"]
|
||||
if "licenses" in raw_gt_data:
|
||||
filtered["licenses"] = raw_gt_data["licenses"]
|
||||
|
||||
if "track_id_offsets" in raw_gt_data:
|
||||
filtered["track_id_offsets"] = raw_gt_data["track_id_offsets"]
|
||||
|
||||
if "split" in raw_gt_data:
|
||||
filtered["split"] = raw_gt_data["split"]
|
||||
|
||||
return filtered
|
||||
@@ -0,0 +1,39 @@
|
||||
import json
|
||||
import argparse
|
||||
from .format_converter import GroundTruthBURSTFormatToTAOFormatConverter, PredictionBURSTFormatToTAOFormatConverter
|
||||
|
||||
|
||||
def main(args):
|
||||
with open(args.gt_input_file) as f:
|
||||
ali_format_gt = json.load(f)
|
||||
tao_format_gt = GroundTruthBURSTFormatToTAOFormatConverter(
|
||||
ali_format_gt, args.split).convert()
|
||||
with open(args.gt_output_file, 'w') as f:
|
||||
json.dump(tao_format_gt, f)
|
||||
|
||||
if args.pred_input_file is None:
|
||||
return
|
||||
with open(args.pred_input_file) as f:
|
||||
ali_format_pred = json.load(f)
|
||||
tao_format_pred = PredictionBURSTFormatToTAOFormatConverter(
|
||||
tao_format_gt, ali_format_pred, args.split,
|
||||
args.exemplar_guided).convert()
|
||||
with open(args.pred_output_file, 'w') as f:
|
||||
json.dump(tao_format_pred, f)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'--gt_input_file', type=str,
|
||||
default='../data/gt/tsunami/exemplar_guided/validation_all_annotations.json')
|
||||
parser.add_argument('--gt_output_file', type=str,
|
||||
default='/tmp/val_gt.json')
|
||||
parser.add_argument('--pred_input_file', type=str,
|
||||
default='../data/trackers/tsunami/exemplar_guided/STCN_off_the_shelf/data/results.json')
|
||||
parser.add_argument('--pred_output_file', type=str,
|
||||
default='/tmp/pred.json')
|
||||
parser.add_argument('--split', type=str, default='validation')
|
||||
parser.add_argument('--exemplar_guided', type=bool, default=True)
|
||||
args_ = parser.parse_args()
|
||||
main(args_)
|
||||
@@ -0,0 +1,259 @@
|
||||
import os
|
||||
import json
|
||||
import pycocotools.mask as cocomask
|
||||
from tabulate import tabulate
|
||||
from typing import Union
|
||||
|
||||
|
||||
def _global_track_id(*, local_track_id: Union[str, int],
|
||||
video_id: Union[str, int],
|
||||
track_id_mapping) -> int:
|
||||
# remap local track ids into globally unique ids
|
||||
return track_id_mapping[str(video_id)][str(local_track_id)]
|
||||
|
||||
|
||||
class GroundTruthBURSTFormatToTAOFormatConverter:
|
||||
def __init__(self, ali_format):
|
||||
self._ali_format = ali_format
|
||||
self._split = ali_format['split']
|
||||
self._categories = self._make_categories()
|
||||
self._videos = []
|
||||
self._annotations = []
|
||||
self._tracks = {}
|
||||
self._images = []
|
||||
self._next_img_id = 0
|
||||
self._next_ann_id = 0
|
||||
|
||||
self._track_id_mapping = self._load_track_id_mapping()
|
||||
|
||||
for seq in ali_format['sequences']:
|
||||
self._visit_seq(seq)
|
||||
|
||||
def _load_track_id_mapping(self):
|
||||
id_map = {}
|
||||
next_global_track_id = 1
|
||||
for seq in self._ali_format['sequences']:
|
||||
seq_id = seq['id']
|
||||
seq_id_map = {}
|
||||
id_map[str(seq_id)] = seq_id_map
|
||||
for local_track_id in seq['track_category_ids']:
|
||||
seq_id_map[str(local_track_id)] = next_global_track_id
|
||||
next_global_track_id += 1
|
||||
return id_map
|
||||
|
||||
def global_track_id(self, *, local_track_id: Union[str, int],
|
||||
video_id: Union[str, int]) -> int:
|
||||
return _global_track_id(local_track_id=local_track_id,
|
||||
video_id=video_id,
|
||||
track_id_mapping=self._track_id_mapping)
|
||||
|
||||
def _visit_seq(self, seq):
|
||||
self._make_video(seq)
|
||||
imgs = self._make_images(seq)
|
||||
self._make_annotations_and_tracks(seq, imgs)
|
||||
|
||||
def _make_images(self, seq):
|
||||
imgs = []
|
||||
for img_path in seq['annotated_image_paths']:
|
||||
video = self._split + '/' + seq['dataset'] + '/' + seq['seq_name']
|
||||
file_name = video + '/' + img_path
|
||||
|
||||
# TODO: once python 3.9 is more common, we can use this nicer and safer code
|
||||
#stripped = img_path.removesuffix('.jpg').removesuffix('.png').removeprefix('frame')
|
||||
stripped = img_path.replace('.jpg', '').replace('.png', '').replace('frame', '')
|
||||
|
||||
last = stripped.split('_')[-1]
|
||||
frame_idx = int(last)
|
||||
|
||||
img = {'id': self._next_img_id, 'video': video,
|
||||
'width': seq['width'], 'height': seq['height'],
|
||||
'file_name': file_name,
|
||||
'frame_index': frame_idx,
|
||||
'video_id': seq['id']}
|
||||
self._next_img_id += 1
|
||||
self._images.append(img)
|
||||
imgs.append(img)
|
||||
return imgs
|
||||
|
||||
def _make_video(self, seq):
|
||||
video_id = seq['id']
|
||||
dataset = seq['dataset']
|
||||
seq_name = seq['seq_name']
|
||||
name = f'{self._split}/' + dataset + '/' + seq_name
|
||||
video = {
|
||||
'id': video_id, 'width': seq['width'], 'height': seq['height'],
|
||||
'neg_category_ids': seq['neg_category_ids'],
|
||||
'not_exhaustive_category_ids': seq['not_exhaustive_category_ids'],
|
||||
'name': name, 'metadata': {'dataset': dataset}}
|
||||
self._videos.append(video)
|
||||
|
||||
def _make_annotations_and_tracks(self, seq, imgs):
|
||||
video_id = seq['id']
|
||||
segs = seq['segmentations']
|
||||
assert len(segs) == len(imgs), (len(segs), len(imgs))
|
||||
for frame_segs, img in zip(segs, imgs):
|
||||
for local_track_id, seg in frame_segs.items():
|
||||
distractors = {20, 63, 108, 180, 188, 204, 212, 247, 303, 403, 407, 415, 490, 504, 507, 513, 529, 567,
|
||||
569, 588, 672, 691, 702, 708, 711, 720, 736, 737, 798, 813, 815, 827, 831, 851, 877, 883,
|
||||
912, 971, 976, 1130, 1133, 1134, 1169, 1184, 1220}
|
||||
global_track_id = self.global_track_id(
|
||||
local_track_id=local_track_id, video_id=seq['id'])
|
||||
rle = seg['rle']
|
||||
segmentation = {'counts': rle,
|
||||
'size': [img['height'], img['width']]}
|
||||
image_id = img['id']
|
||||
category_id = int(seq['track_category_ids'][local_track_id])
|
||||
if category_id in distractors:
|
||||
continue
|
||||
coco_bbox = cocomask.toBbox(segmentation)
|
||||
bbox = [int(x) for x in coco_bbox]
|
||||
ann = {'segmentation': segmentation, 'id': self._next_ann_id,
|
||||
'image_id': image_id, 'category_id': category_id,
|
||||
'track_id': global_track_id, 'video_id': video_id,
|
||||
'bbox': bbox}
|
||||
self._next_ann_id += 1
|
||||
self._annotations.append(ann)
|
||||
|
||||
if global_track_id not in self._tracks:
|
||||
track = {'id': global_track_id, 'category_id': category_id,
|
||||
'video_id': video_id}
|
||||
self._tracks[global_track_id] = track
|
||||
|
||||
def convert(self):
|
||||
tracks = sorted(self._tracks.values(), key=lambda t: t['id'])
|
||||
return {'videos': self._videos, 'annotations': self._annotations,
|
||||
'tracks': tracks, 'images': self._images,
|
||||
'categories': self._categories,
|
||||
'track_id_mapping': self._track_id_mapping,
|
||||
'split': self._split}
|
||||
|
||||
def _make_categories(self):
|
||||
tao_categories_path = os.path.join(os.path.dirname(__file__), 'tao_categories.json')
|
||||
with open(tao_categories_path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
class PredictionBURSTFormatToTAOFormatConverter:
|
||||
def __init__(self, gt, ali_format, exemplar_guided):
|
||||
self._gt = gt
|
||||
self._ali_format = ali_format
|
||||
if 'split' in ali_format:
|
||||
self._split = ali_format['split']
|
||||
gt_split = self._gt['split']
|
||||
assert self._split == gt_split, (self._split, gt_split)
|
||||
else:
|
||||
self._split = self._gt['split']
|
||||
self._exemplar_guided = exemplar_guided
|
||||
self._result = []
|
||||
self._next_det_id = 0
|
||||
|
||||
self._img_by_filename = {}
|
||||
for img in self._gt['images']:
|
||||
file_name = img['file_name']
|
||||
assert file_name not in self._img_by_filename
|
||||
self._img_by_filename[file_name] = img
|
||||
|
||||
self._gt_track_by_track_id = {}
|
||||
for track in self._gt['tracks']:
|
||||
self._gt_track_by_track_id[int(track['id'])] = track
|
||||
|
||||
self._filtered_out_track_ids = set()
|
||||
|
||||
for seq in ali_format['sequences']:
|
||||
self._visit_seq(seq)
|
||||
|
||||
if exemplar_guided and len(self._filtered_out_track_ids) > 0:
|
||||
self.print_filter_out_debug_info(ali_format)
|
||||
|
||||
def print_filter_out_debug_info(self, ali_format):
|
||||
track_ids_in_pred = set()
|
||||
a_dict_for_debugging = {}
|
||||
for seq in ali_format['sequences']:
|
||||
for local_track_id in seq['track_category_ids']:
|
||||
global_track_id = _global_track_id(
|
||||
local_track_id=local_track_id, video_id=seq['id'],
|
||||
track_id_mapping=self._gt['track_id_mapping'])
|
||||
track_ids_in_pred.add(global_track_id)
|
||||
a_dict_for_debugging[global_track_id] = {'seq': seq,
|
||||
'local_track_id': local_track_id}
|
||||
print('Number of Track ids in pred:', len(track_ids_in_pred))
|
||||
print('Exemplar Guided: Filtered out',
|
||||
len(self._filtered_out_track_ids),
|
||||
'tracks which were not found in the ground truth.')
|
||||
track_ids_after_filtering = set(d['track_id'] for d in self._result)
|
||||
print('Number of tracks after filtering:',
|
||||
len(track_ids_after_filtering))
|
||||
problem_tracks = list(
|
||||
track_ids_in_pred - track_ids_after_filtering - self._filtered_out_track_ids)
|
||||
if len(problem_tracks) > 0:
|
||||
print("\nWARNING:", len(problem_tracks),
|
||||
"object tracks are not present. There could be a number of reasons for this:\n"
|
||||
"(1) If you are running evaluation for the box/point exemplar-guided task then this is to be expected"
|
||||
" because your tracker probably didn't predict masks for every ground-truth object instance.\n"
|
||||
"(2) If you are running evaluation for the mask exemplar-guided task, then this could indicate a "
|
||||
"problem. Assume that you copied the given first-frame object mask to your predicted result, this "
|
||||
"should not happen. It could be that your predictions are at the wrong frame-rate i.e. you have no "
|
||||
"predicted masks for video frames which will be evaluated.\n")
|
||||
|
||||
rows = []
|
||||
for xx in problem_tracks:
|
||||
rows.append([a_dict_for_debugging[xx]['seq']['dataset'],
|
||||
a_dict_for_debugging[xx]['seq']['seq_name'],
|
||||
a_dict_for_debugging[xx]['local_track_id']])
|
||||
|
||||
print("For your reference, the sequence name and track IDs for these missing tracks are:")
|
||||
print(tabulate(rows, ["Dataset", "Sequence Name", "Track ID"]))
|
||||
|
||||
def _visit_seq(self, seq):
|
||||
dataset = seq['dataset']
|
||||
seq_name = seq['seq_name']
|
||||
assert len(seq['segmentations']) == len(seq['annotated_image_paths'])
|
||||
for frame_segs, img_path in zip(seq['segmentations'],
|
||||
seq['annotated_image_paths']):
|
||||
for local_track_id_str, track_det in frame_segs.items():
|
||||
rle = track_det['rle']
|
||||
|
||||
file_name = self._split + '/' + dataset + '/' + seq_name + '/' + img_path
|
||||
# the result might have a higher frame rate than the ground truth
|
||||
if file_name not in self._img_by_filename:
|
||||
continue
|
||||
|
||||
img = self._img_by_filename[file_name]
|
||||
img_id = img['id']
|
||||
height = img['height']
|
||||
width = img['width']
|
||||
segmentation = {'counts': rle, 'size': [height, width]}
|
||||
|
||||
local_track_id = int(local_track_id_str)
|
||||
if self._exemplar_guided:
|
||||
global_track_id = _global_track_id(
|
||||
local_track_id=local_track_id, video_id=seq['id'],
|
||||
track_id_mapping=self._gt['track_id_mapping'])
|
||||
else:
|
||||
global_track_id = local_track_id
|
||||
coco_bbox = cocomask.toBbox(segmentation)
|
||||
bbox = [int(x) for x in coco_bbox]
|
||||
det = {'id': self._next_det_id, 'image_id': img_id,
|
||||
'track_id': global_track_id, 'bbox': bbox,
|
||||
'segmentation': segmentation}
|
||||
if self._exemplar_guided:
|
||||
if global_track_id not in self._gt_track_by_track_id:
|
||||
self._filtered_out_track_ids.add(global_track_id)
|
||||
continue
|
||||
gt_track = self._gt_track_by_track_id[global_track_id]
|
||||
category_id = gt_track['category_id']
|
||||
det['category_id'] = category_id
|
||||
elif 'category_id' in track_det:
|
||||
det['category_id'] = track_det['category_id']
|
||||
else:
|
||||
category_id = seq['track_category_ids'][local_track_id_str]
|
||||
det['category_id'] = category_id
|
||||
self._next_det_id += 1
|
||||
if 'score' in track_det:
|
||||
det['score'] = track_det['score']
|
||||
else:
|
||||
det['score'] = 1.0
|
||||
self._result.append(det)
|
||||
|
||||
def convert(self):
|
||||
return self._result
|
||||
File diff suppressed because one or more lines are too long
91
test/yolov7-tracker/tracker/trackeval/datasets/burst_ow.py
Normal file
91
test/yolov7-tracker/tracker/trackeval/datasets/burst_ow.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import json
|
||||
import os
|
||||
from .burst_helpers.burst_ow_base import BURST_OW_Base
|
||||
from .burst_helpers.format_converter import GroundTruthBURSTFormatToTAOFormatConverter, PredictionBURSTFormatToTAOFormatConverter
|
||||
from .. import utils
|
||||
|
||||
|
||||
class BURST_OW(BURST_OW_Base):
|
||||
"""Dataset class for TAO tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
tao_config = BURST_OW_Base.get_default_dataset_config()
|
||||
code_path = utils.get_code_path()
|
||||
tao_config['GT_FOLDER'] = os.path.join(
|
||||
code_path, 'data/gt/burst/all_classes/val/') # Location of GT data
|
||||
tao_config['TRACKERS_FOLDER'] = os.path.join(
|
||||
code_path, 'data/trackers/burst/open-world/val/') # Trackers location
|
||||
return tao_config
|
||||
|
||||
def _iou_type(self):
|
||||
return 'mask'
|
||||
|
||||
def _box_or_mask_from_det(self, det):
|
||||
if "segmentation" in det:
|
||||
return det["segmentation"]
|
||||
else:
|
||||
return det["mask"]
|
||||
|
||||
def _calculate_area_for_ann(self, ann):
|
||||
import pycocotools.mask as cocomask
|
||||
seg = self._box_or_mask_from_det(ann)
|
||||
return cocomask.area(seg)
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
|
||||
return similarity_scores
|
||||
|
||||
def _postproc_ground_truth_data(self, data):
|
||||
return GroundTruthBURSTFormatToTAOFormatConverter(data).convert()
|
||||
|
||||
def _postproc_prediction_data(self, data):
|
||||
# if it's a list, it's already in TAO format and not in Ali format
|
||||
# however the image ids do not match and need to be remapped
|
||||
if isinstance(data, list):
|
||||
_remap_image_ids(data, self.gt_data)
|
||||
return data
|
||||
|
||||
return PredictionBURSTFormatToTAOFormatConverter(
|
||||
self.gt_data, data,
|
||||
exemplar_guided=False).convert()
|
||||
|
||||
|
||||
def _remap_image_ids(pred_data, ali_gt_data):
|
||||
code_path = utils.get_code_path()
|
||||
if 'split' in ali_gt_data:
|
||||
split = ali_gt_data['split']
|
||||
else:
|
||||
split = 'val'
|
||||
|
||||
if split in ('val', 'validation'):
|
||||
tao_gt_path = os.path.join(
|
||||
code_path, 'data/gt/tao/tao_validation/gt.json')
|
||||
else:
|
||||
tao_gt_path = os.path.join(
|
||||
code_path, 'data/gt/tao/tao_test/test_without_annotations.json')
|
||||
|
||||
with open(tao_gt_path) as f:
|
||||
tao_gt = json.load(f)
|
||||
|
||||
tao_img_by_id = {}
|
||||
for img in tao_gt['images']:
|
||||
img_id = img['id']
|
||||
tao_img_by_id[img_id] = img
|
||||
|
||||
ali_img_id_by_filename = {}
|
||||
for ali_img in ali_gt_data['images']:
|
||||
ali_img_id = ali_img['id']
|
||||
file_name = ali_img['file_name'].replace("validation", "val")
|
||||
ali_img_id_by_filename[file_name] = ali_img_id
|
||||
|
||||
ali_img_id_by_tao_img_id = {}
|
||||
for tao_img_id, tao_img in tao_img_by_id.items():
|
||||
file_name = tao_img['file_name']
|
||||
ali_img_id = ali_img_id_by_filename[file_name]
|
||||
ali_img_id_by_tao_img_id[tao_img_id] = ali_img_id
|
||||
|
||||
for det in pred_data:
|
||||
tao_img_id = det['image_id']
|
||||
ali_img_id = ali_img_id_by_tao_img_id[tao_img_id]
|
||||
det['image_id'] = ali_img_id
|
||||
276
test/yolov7-tracker/tracker/trackeval/datasets/davis.py
Normal file
276
test/yolov7-tracker/tracker/trackeval/datasets/davis.py
Normal file
@@ -0,0 +1,276 @@
|
||||
import os
|
||||
import csv
|
||||
import numpy as np
|
||||
from ._base_dataset import _BaseDataset
|
||||
from ..utils import TrackEvalException
|
||||
from .. import utils
|
||||
from .. import _timing
|
||||
|
||||
|
||||
class DAVIS(_BaseDataset):
|
||||
"""Dataset class for DAVIS tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/davis/davis_unsupervised_val/'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/davis/davis_unsupervised_val/'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'SPLIT_TO_EVAL': 'val', # Valid: 'val', 'train'
|
||||
'CLASSES_TO_EVAL': ['general'],
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
'SEQMAP_FILE': None, # Specify seqmap file
|
||||
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
|
||||
# '{gt_folder}/Annotations_unsupervised/480p/{seq}'
|
||||
'MAX_DETECTIONS': 0 # Maximum number of allowed detections per sequence (0 for no threshold)
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
# defining a default class since there are no classes in DAVIS
|
||||
self.should_classes_combine = False
|
||||
self.use_super_categories = False
|
||||
|
||||
self.gt_fol = self.config['GT_FOLDER']
|
||||
self.tracker_fol = self.config['TRACKERS_FOLDER']
|
||||
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.config['TRACKERS_FOLDER']
|
||||
|
||||
self.max_det = self.config['MAX_DETECTIONS']
|
||||
|
||||
# Get classes to eval
|
||||
self.valid_classes = ['general']
|
||||
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
for cls in self.config['CLASSES_TO_EVAL']]
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only general class is valid.')
|
||||
|
||||
# Get sequences to eval
|
||||
if self.config["SEQ_INFO"]:
|
||||
self.seq_list = list(self.config["SEQ_INFO"].keys())
|
||||
self.seq_lengths = self.config["SEQ_INFO"]
|
||||
elif self.config["SEQMAP_FILE"]:
|
||||
self.seq_list = []
|
||||
seqmap_file = self.config["SEQMAP_FILE"]
|
||||
if not os.path.isfile(seqmap_file):
|
||||
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
|
||||
with open(seqmap_file) as fp:
|
||||
reader = csv.reader(fp)
|
||||
for i, row in enumerate(reader):
|
||||
if row[0] == '':
|
||||
continue
|
||||
seq = row[0]
|
||||
self.seq_list.append(seq)
|
||||
else:
|
||||
self.seq_list = os.listdir(self.gt_fol)
|
||||
|
||||
self.seq_lengths = {seq: len(os.listdir(os.path.join(self.gt_fol, seq))) for seq in self.seq_list}
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
for tracker in self.tracker_list:
|
||||
for seq in self.seq_list:
|
||||
curr_dir = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq)
|
||||
if not os.path.isdir(curr_dir):
|
||||
print('Tracker directory not found: ' + curr_dir)
|
||||
raise TrackEvalException('Tracker directory not found: ' +
|
||||
os.path.join(tracker, self.tracker_sub_fol, seq))
|
||||
tr_timesteps = len(os.listdir(curr_dir))
|
||||
if self.seq_lengths[seq] != tr_timesteps:
|
||||
raise TrackEvalException('GT folder and tracker folder have a different number'
|
||||
'timesteps for tracker %s and sequence %s' % (tracker, seq))
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the DAVIS format
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets]: list (for each timestep) of lists of detections.
|
||||
[masks_void]: list of masks with void pixels (pixels to be ignored during evaluation)
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
"""
|
||||
|
||||
# Only loaded when run to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
from PIL import Image
|
||||
|
||||
# File location
|
||||
if is_gt:
|
||||
seq_dir = os.path.join(self.gt_fol, seq)
|
||||
else:
|
||||
seq_dir = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq)
|
||||
|
||||
num_timesteps = self.seq_lengths[seq]
|
||||
data_keys = ['ids', 'dets', 'masks_void']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
|
||||
# read frames
|
||||
frames = [os.path.join(seq_dir, im_name) for im_name in sorted(os.listdir(seq_dir))]
|
||||
|
||||
id_list = []
|
||||
for t in range(num_timesteps):
|
||||
frame = np.array(Image.open(frames[t]))
|
||||
if is_gt:
|
||||
void = frame == 255
|
||||
frame[void] = 0
|
||||
raw_data['masks_void'][t] = mask_utils.encode(np.asfortranarray(void.astype(np.uint8)))
|
||||
id_values = np.unique(frame)
|
||||
id_values = id_values[id_values != 0]
|
||||
id_list += list(id_values)
|
||||
tmp = np.ones((len(id_values), *frame.shape))
|
||||
tmp = tmp * id_values[:, None, None]
|
||||
masks = np.array(tmp == frame[None, ...]).astype(np.uint8)
|
||||
raw_data['dets'][t] = mask_utils.encode(np.array(np.transpose(masks, (1, 2, 0)), order='F'))
|
||||
raw_data['ids'][t] = id_values.astype(int)
|
||||
num_objects = len(np.unique(id_list))
|
||||
|
||||
if not is_gt and num_objects > self.max_det > 0:
|
||||
raise Exception('Number of proposals (%i) for sequence %s exceeds number of maximum allowed proposals (%i).'
|
||||
% (num_objects, seq, self.max_det))
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
raw_data["num_timesteps"] = num_timesteps
|
||||
raw_data['mask_shape'] = np.array(Image.open(frames[0])).shape
|
||||
if is_gt:
|
||||
raw_data['num_gt_ids'] = num_objects
|
||||
else:
|
||||
raw_data['num_tracker_ids'] = num_objects
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detection masks.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
|
||||
DAVIS:
|
||||
In DAVIS, the 4 preproc steps are as follow:
|
||||
1) There are no classes, all detections are evaluated jointly
|
||||
2) No matched tracker detections are removed.
|
||||
3) No unmatched tracker detections are removed.
|
||||
4) There are no ground truth detections (e.g. those of distractor classes) to be removed.
|
||||
Preprocessing special to DAVIS: Pixels which are marked as void in the ground truth are set to zero in the
|
||||
tracker detections since they are not considered during evaluation.
|
||||
"""
|
||||
|
||||
# Only loaded when run to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_timesteps = raw_data['num_timesteps']
|
||||
|
||||
# count detections
|
||||
for t in range(num_timesteps):
|
||||
num_gt_dets += len(raw_data['gt_dets'][t])
|
||||
num_tracker_dets += len(raw_data['tracker_dets'][t])
|
||||
unique_gt_ids += list(np.unique(raw_data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(raw_data['tracker_ids'][t]))
|
||||
|
||||
data['gt_ids'] = raw_data['gt_ids']
|
||||
data['gt_dets'] = raw_data['gt_dets']
|
||||
data['similarity_scores'] = raw_data['similarity_scores']
|
||||
data['tracker_ids'] = raw_data['tracker_ids']
|
||||
|
||||
# set void pixels in tracker detections to zero
|
||||
for t in range(num_timesteps):
|
||||
void_mask = raw_data['masks_void'][t]
|
||||
if mask_utils.area(void_mask) > 0:
|
||||
void_mask_ious = np.atleast_1d(mask_utils.iou(raw_data['tracker_dets'][t], [void_mask], [False]))
|
||||
if void_mask_ious.any():
|
||||
rows, columns = np.where(void_mask_ious > 0)
|
||||
for r in rows:
|
||||
det = mask_utils.decode(raw_data['tracker_dets'][t][r])
|
||||
void = mask_utils.decode(void_mask).astype(np.bool)
|
||||
det[void] = 0
|
||||
det = mask_utils.encode(np.array(det, order='F').astype(np.uint8))
|
||||
raw_data['tracker_dets'][t][r] = det
|
||||
data['tracker_dets'] = raw_data['tracker_dets']
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = raw_data['num_tracker_ids']
|
||||
data['num_gt_ids'] = raw_data['num_gt_ids']
|
||||
data['mask_shape'] = raw_data['mask_shape']
|
||||
data['num_timesteps'] = num_timesteps
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
|
||||
return similarity_scores
|
||||
@@ -0,0 +1,459 @@
|
||||
import os
|
||||
import csv
|
||||
import configparser
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ._base_dataset import _BaseDataset
|
||||
from .. import utils
|
||||
from .. import _timing
|
||||
from ..utils import TrackEvalException
|
||||
|
||||
|
||||
class HeadTrackingChallenge(_BaseDataset):
|
||||
"""Dataset class for Head Tracking Challenge - 2D bounding box tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/mot_challenge/'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/mot_challenge/'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'CLASSES_TO_EVAL': ['pedestrian'], # Valid: ['pedestrian']
|
||||
'BENCHMARK': 'HT', # Valid: 'HT'. Refers to "Head Tracking or the dataset CroHD"
|
||||
'SPLIT_TO_EVAL': 'train', # Valid: 'train', 'test', 'all'
|
||||
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'DO_PREPROC': True, # Whether to perform preprocessing (never done for MOT15)
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
|
||||
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use seqmap_folder/benchmark-split_to_eval)
|
||||
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
|
||||
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt', # '{gt_folder}/{seq}/gt/gt.txt'
|
||||
'SKIP_SPLIT_FOL': False, # If False, data is in GT_FOLDER/BENCHMARK-SPLIT_TO_EVAL/ and in
|
||||
# TRACKERS_FOLDER/BENCHMARK-SPLIT_TO_EVAL/tracker/
|
||||
# If True, then the middle 'benchmark-split' folder is skipped for both.
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
|
||||
self.benchmark = self.config['BENCHMARK']
|
||||
gt_set = self.config['BENCHMARK'] + '-' + self.config['SPLIT_TO_EVAL']
|
||||
self.gt_set = gt_set
|
||||
if not self.config['SKIP_SPLIT_FOL']:
|
||||
split_fol = gt_set
|
||||
else:
|
||||
split_fol = ''
|
||||
self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
|
||||
self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
|
||||
self.should_classes_combine = False
|
||||
self.use_super_categories = False
|
||||
self.data_is_zipped = self.config['INPUT_AS_ZIP']
|
||||
self.do_preproc = self.config['DO_PREPROC']
|
||||
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
|
||||
# Get classes to eval
|
||||
self.valid_classes = ['pedestrian']
|
||||
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
for cls in self.config['CLASSES_TO_EVAL']]
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
|
||||
self.class_name_to_class_id = {'pedestrian': 1, 'static': 2, 'ignore': 3, 'person_on_vehicle': 4}
|
||||
self.valid_class_numbers = list(self.class_name_to_class_id.values())
|
||||
|
||||
# Get sequences to eval and check gt files exist
|
||||
self.seq_list, self.seq_lengths = self._get_seq_info()
|
||||
if len(self.seq_list) < 1:
|
||||
raise TrackEvalException('No sequences are selected to be evaluated.')
|
||||
|
||||
# Check gt files exist
|
||||
for seq in self.seq_list:
|
||||
if not self.data_is_zipped:
|
||||
curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
|
||||
if not os.path.isfile(curr_file):
|
||||
print('GT file not found ' + curr_file)
|
||||
raise TrackEvalException('GT file not found for sequence: ' + seq)
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('GT file not found ' + curr_file)
|
||||
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
for tracker in self.tracker_list:
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
|
||||
else:
|
||||
for seq in self.seq_list:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException(
|
||||
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
|
||||
curr_file))
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _get_seq_info(self):
|
||||
seq_list = []
|
||||
seq_lengths = {}
|
||||
if self.config["SEQ_INFO"]:
|
||||
seq_list = list(self.config["SEQ_INFO"].keys())
|
||||
seq_lengths = self.config["SEQ_INFO"]
|
||||
|
||||
# If sequence length is 'None' tries to read sequence length from .ini files.
|
||||
for seq, seq_length in seq_lengths.items():
|
||||
if seq_length is None:
|
||||
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
|
||||
if not os.path.isfile(ini_file):
|
||||
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
|
||||
ini_data = configparser.ConfigParser()
|
||||
ini_data.read(ini_file)
|
||||
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
|
||||
|
||||
else:
|
||||
if self.config["SEQMAP_FILE"]:
|
||||
seqmap_file = self.config["SEQMAP_FILE"]
|
||||
else:
|
||||
if self.config["SEQMAP_FOLDER"] is None:
|
||||
seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
|
||||
else:
|
||||
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
|
||||
if not os.path.isfile(seqmap_file):
|
||||
print('no seqmap found: ' + seqmap_file)
|
||||
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
|
||||
with open(seqmap_file) as fp:
|
||||
reader = csv.reader(fp)
|
||||
for i, row in enumerate(reader):
|
||||
if i == 0 or row[0] == '':
|
||||
continue
|
||||
seq = row[0]
|
||||
seq_list.append(seq)
|
||||
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
|
||||
if not os.path.isfile(ini_file):
|
||||
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
|
||||
ini_data = configparser.ConfigParser()
|
||||
ini_data.read(ini_file)
|
||||
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
|
||||
return seq_list, seq_lengths
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the MOT Challenge 2D box format
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
|
||||
[gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
"""
|
||||
# File location
|
||||
if self.data_is_zipped:
|
||||
if is_gt:
|
||||
zip_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
else:
|
||||
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
file = seq + '.txt'
|
||||
else:
|
||||
zip_file = None
|
||||
if is_gt:
|
||||
file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
|
||||
else:
|
||||
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
|
||||
# Load raw data from text file
|
||||
read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file)
|
||||
|
||||
# Convert data to required format
|
||||
num_timesteps = self.seq_lengths[seq]
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if is_gt:
|
||||
data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
|
||||
else:
|
||||
data_keys += ['tracker_confidences']
|
||||
|
||||
if self.benchmark == 'HT':
|
||||
data_keys += ['visibility']
|
||||
data_keys += ['gt_conf']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
|
||||
# Check for any extra time keys
|
||||
current_time_keys = [str( t+ 1) for t in range(num_timesteps)]
|
||||
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
|
||||
if len(extra_time_keys) > 0:
|
||||
if is_gt:
|
||||
text = 'Ground-truth'
|
||||
else:
|
||||
text = 'Tracking'
|
||||
raise TrackEvalException(
|
||||
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
|
||||
[str(x) + ', ' for x in extra_time_keys]))
|
||||
|
||||
for t in range(num_timesteps):
|
||||
time_key = str(t+1)
|
||||
if time_key in read_data.keys():
|
||||
try:
|
||||
time_data = np.asarray(read_data[time_key], dtype=np.float)
|
||||
except ValueError:
|
||||
if is_gt:
|
||||
raise TrackEvalException(
|
||||
'Cannot convert gt data for sequence %s to float. Is data corrupted?' % seq)
|
||||
else:
|
||||
raise TrackEvalException(
|
||||
'Cannot convert tracking data from tracker %s, sequence %s to float. Is data corrupted?' % (
|
||||
tracker, seq))
|
||||
try:
|
||||
raw_data['dets'][t] = np.atleast_2d(time_data[:, 2:6])
|
||||
raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
|
||||
except IndexError:
|
||||
if is_gt:
|
||||
err = 'Cannot load gt data from sequence %s, because there is not enough ' \
|
||||
'columns in the data.' % seq
|
||||
raise TrackEvalException(err)
|
||||
else:
|
||||
err = 'Cannot load tracker data from tracker %s, sequence %s, because there is not enough ' \
|
||||
'columns in the data.' % (tracker, seq)
|
||||
raise TrackEvalException(err)
|
||||
if time_data.shape[1] >= 8:
|
||||
raw_data['gt_conf'][t] = np.atleast_1d(time_data[:, 6]).astype(float)
|
||||
raw_data['visibility'][t] = np.atleast_1d(time_data[:, 8]).astype(float)
|
||||
raw_data['classes'][t] = np.atleast_1d(time_data[:, 7]).astype(int)
|
||||
else:
|
||||
if not is_gt:
|
||||
raw_data['classes'][t] = np.ones_like(raw_data['ids'][t])
|
||||
else:
|
||||
raise TrackEvalException(
|
||||
'GT data is not in a valid format, there is not enough rows in seq %s, timestep %i.' % (
|
||||
seq, t))
|
||||
if is_gt:
|
||||
gt_extras_dict = {'zero_marked': np.atleast_1d(time_data[:, 6].astype(int))}
|
||||
raw_data['gt_extras'][t] = gt_extras_dict
|
||||
else:
|
||||
raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 6])
|
||||
else:
|
||||
raw_data['dets'][t] = np.empty((0, 4))
|
||||
raw_data['ids'][t] = np.empty(0).astype(int)
|
||||
raw_data['classes'][t] = np.empty(0).astype(int)
|
||||
if is_gt:
|
||||
gt_extras_dict = {'zero_marked': np.empty(0)}
|
||||
raw_data['gt_extras'][t] = gt_extras_dict
|
||||
else:
|
||||
raw_data['tracker_confidences'][t] = np.empty(0)
|
||||
if is_gt:
|
||||
raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
raw_data['num_timesteps'] = num_timesteps
|
||||
raw_data['seq'] = seq
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
|
||||
MOT Challenge:
|
||||
In MOT Challenge, the 4 preproc steps are as follow:
|
||||
1) There is only one class (pedestrian) to be evaluated, but all other classes are used for preproc.
|
||||
2) Predictions are matched against all gt boxes (regardless of class), those matching with distractor
|
||||
objects are removed.
|
||||
3) There is no crowd ignore regions.
|
||||
4) All gt dets except pedestrian are removed, also removes pedestrian gt dets marked with zero_marked.
|
||||
"""
|
||||
# Check that input data has unique ids
|
||||
self._check_unique_ids(raw_data)
|
||||
|
||||
# 'static': 2, 'ignore': 3, 'person_on_vehicle':
|
||||
|
||||
distractor_class_names = ['static', 'ignore', 'person_on_vehicle']
|
||||
|
||||
distractor_classes = [self.class_name_to_class_id[x] for x in distractor_class_names]
|
||||
cls_id = self.class_name_to_class_id[cls]
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences',
|
||||
'similarity_scores', 'gt_visibility']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Get all data
|
||||
gt_ids = raw_data['gt_ids'][t]
|
||||
gt_dets = raw_data['gt_dets'][t]
|
||||
gt_classes = raw_data['gt_classes'][t]
|
||||
gt_visibility = raw_data['visibility'][t]
|
||||
gt_conf = raw_data['gt_conf'][t]
|
||||
|
||||
gt_zero_marked = raw_data['gt_extras'][t]['zero_marked']
|
||||
|
||||
tracker_ids = raw_data['tracker_ids'][t]
|
||||
tracker_dets = raw_data['tracker_dets'][t]
|
||||
tracker_classes = raw_data['tracker_classes'][t]
|
||||
tracker_confidences = raw_data['tracker_confidences'][t]
|
||||
similarity_scores = raw_data['similarity_scores'][t]
|
||||
|
||||
# Evaluation is ONLY valid for pedestrian class
|
||||
if len(tracker_classes) > 0 and np.max(tracker_classes) > 1:
|
||||
raise TrackEvalException(
|
||||
'Evaluation is only valid for pedestrian class. Non pedestrian class (%i) found in sequence %s at '
|
||||
'timestep %i.' % (np.max(tracker_classes), raw_data['seq'], t))
|
||||
|
||||
# Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
|
||||
# which are labeled as belonging to a distractor class.
|
||||
to_remove_tracker = np.array([], np.int)
|
||||
if self.do_preproc and self.benchmark != 'MOT15' and gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
|
||||
|
||||
# Check all classes are valid:
|
||||
invalid_classes = np.setdiff1d(np.unique(gt_classes), self.valid_class_numbers)
|
||||
if len(invalid_classes) > 0:
|
||||
print(' '.join([str(x) for x in invalid_classes]))
|
||||
raise(TrackEvalException('Attempting to evaluate using invalid gt classes. '
|
||||
'This warning only triggers if preprocessing is performed, '
|
||||
'e.g. not for MOT15 or where prepropressing is explicitly disabled. '
|
||||
'Please either check your gt data, or disable preprocessing. '
|
||||
'The following invalid classes were found in timestep ' + str(t) + ': ' +
|
||||
' '.join([str(x) for x in invalid_classes])))
|
||||
|
||||
matching_scores = similarity_scores.copy()
|
||||
|
||||
matching_scores[matching_scores < 0.4 - np.finfo('float').eps] = 0
|
||||
|
||||
match_rows, match_cols = linear_sum_assignment(-matching_scores)
|
||||
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
match_rows = match_rows[actually_matched_mask]
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
|
||||
is_distractor_class = np.logical_not(np.isin(gt_classes[match_rows], cls_id))
|
||||
if self.benchmark == 'HT':
|
||||
is_invisible_class = gt_visibility[match_rows] < np.finfo('float').eps
|
||||
low_conf_class = gt_conf[match_rows] < np.finfo('float').eps
|
||||
are_distractors = np.logical_or(is_invisible_class, is_distractor_class, low_conf_class)
|
||||
to_remove_tracker = match_cols[are_distractors]
|
||||
else:
|
||||
to_remove_tracker = match_cols[is_distractor_class]
|
||||
|
||||
# Apply preprocessing to remove all unwanted tracker dets.
|
||||
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
|
||||
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
|
||||
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
|
||||
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
|
||||
|
||||
# Remove gt detections marked as to remove (zero marked), and also remove gt detections not in pedestrian
|
||||
if self.do_preproc and self.benchmark == 'HT':
|
||||
gt_to_keep_mask = (np.not_equal(gt_zero_marked, 0)) & \
|
||||
(np.equal(gt_classes, cls_id)) & \
|
||||
(gt_visibility > 0.) & \
|
||||
(gt_conf > 0.)
|
||||
|
||||
else:
|
||||
# There are no classes for MOT15
|
||||
gt_to_keep_mask = np.not_equal(gt_zero_marked, 0)
|
||||
data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
|
||||
data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
|
||||
data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
|
||||
data['gt_visibility'][t] = gt_visibility # No mask!
|
||||
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
data['seq'] = raw_data['seq']
|
||||
|
||||
# Ensure again that ids are unique per timestep after preproc.
|
||||
self._check_unique_ids(data, after_preproc=True)
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='xywh')
|
||||
return similarity_scores
|
||||
389
test/yolov7-tracker/tracker/trackeval/datasets/kitti_2d_box.py
Normal file
389
test/yolov7-tracker/tracker/trackeval/datasets/kitti_2d_box.py
Normal file
@@ -0,0 +1,389 @@
|
||||
|
||||
import os
|
||||
import csv
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ._base_dataset import _BaseDataset
|
||||
from .. import utils
|
||||
from ..utils import TrackEvalException
|
||||
from .. import _timing
|
||||
|
||||
|
||||
class Kitti2DBox(_BaseDataset):
|
||||
"""Dataset class for KITTI 2D bounding box tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/kitti/kitti_2d_box_train'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/kitti/kitti_2d_box_train/'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'CLASSES_TO_EVAL': ['car', 'pedestrian'], # Valid: ['car', 'pedestrian']
|
||||
'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val', 'training_minus_val', 'test'
|
||||
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
self.gt_fol = self.config['GT_FOLDER']
|
||||
self.tracker_fol = self.config['TRACKERS_FOLDER']
|
||||
self.should_classes_combine = False
|
||||
self.use_super_categories = False
|
||||
self.data_is_zipped = self.config['INPUT_AS_ZIP']
|
||||
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
|
||||
self.max_occlusion = 2
|
||||
self.max_truncation = 0
|
||||
self.min_height = 25
|
||||
|
||||
# Get classes to eval
|
||||
self.valid_classes = ['car', 'pedestrian']
|
||||
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
for cls in self.config['CLASSES_TO_EVAL']]
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes [car, pedestrian] are valid.')
|
||||
self.class_name_to_class_id = {'car': 1, 'van': 2, 'truck': 3, 'pedestrian': 4, 'person': 5, # person sitting
|
||||
'cyclist': 6, 'tram': 7, 'misc': 8, 'dontcare': 9, 'car_2': 1}
|
||||
|
||||
# Get sequences to eval and check gt files exist
|
||||
self.seq_list = []
|
||||
self.seq_lengths = {}
|
||||
seqmap_name = 'evaluate_tracking.seqmap.' + self.config['SPLIT_TO_EVAL']
|
||||
seqmap_file = os.path.join(self.gt_fol, seqmap_name)
|
||||
if not os.path.isfile(seqmap_file):
|
||||
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
|
||||
with open(seqmap_file) as fp:
|
||||
dialect = csv.Sniffer().sniff(fp.read(1024))
|
||||
fp.seek(0)
|
||||
reader = csv.reader(fp, dialect)
|
||||
for row in reader:
|
||||
if len(row) >= 4:
|
||||
seq = row[0]
|
||||
self.seq_list.append(seq)
|
||||
self.seq_lengths[seq] = int(row[3])
|
||||
if not self.data_is_zipped:
|
||||
curr_file = os.path.join(self.gt_fol, 'label_02', seq + '.txt')
|
||||
if not os.path.isfile(curr_file):
|
||||
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
for tracker in self.tracker_list:
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
|
||||
else:
|
||||
for seq in self.seq_list:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
if not os.path.isfile(curr_file):
|
||||
raise TrackEvalException(
|
||||
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
|
||||
curr_file))
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the kitti 2D box format
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
|
||||
[gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
"""
|
||||
# File location
|
||||
if self.data_is_zipped:
|
||||
if is_gt:
|
||||
zip_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
else:
|
||||
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
file = seq + '.txt'
|
||||
else:
|
||||
zip_file = None
|
||||
if is_gt:
|
||||
file = os.path.join(self.gt_fol, 'label_02', seq + '.txt')
|
||||
else:
|
||||
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
|
||||
# Ignore regions
|
||||
if is_gt:
|
||||
crowd_ignore_filter = {2: ['dontcare']}
|
||||
else:
|
||||
crowd_ignore_filter = None
|
||||
|
||||
# Valid classes
|
||||
valid_filter = {2: [x for x in self.class_list]}
|
||||
if is_gt:
|
||||
if 'car' in self.class_list:
|
||||
valid_filter[2].append('van')
|
||||
if 'pedestrian' in self.class_list:
|
||||
valid_filter[2] += ['person']
|
||||
|
||||
# Convert kitti class strings to class ids
|
||||
convert_filter = {2: self.class_name_to_class_id}
|
||||
|
||||
# Load raw data from text file
|
||||
read_data, ignore_data = self._load_simple_text_file(file, time_col=0, id_col=1, remove_negative_ids=True,
|
||||
valid_filter=valid_filter,
|
||||
crowd_ignore_filter=crowd_ignore_filter,
|
||||
convert_filter=convert_filter,
|
||||
is_zipped=self.data_is_zipped, zip_file=zip_file)
|
||||
# Convert data to required format
|
||||
num_timesteps = self.seq_lengths[seq]
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if is_gt:
|
||||
data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
|
||||
else:
|
||||
data_keys += ['tracker_confidences']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
|
||||
# Check for any extra time keys
|
||||
current_time_keys = [str(t) for t in range(num_timesteps)]
|
||||
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
|
||||
if len(extra_time_keys) > 0:
|
||||
if is_gt:
|
||||
text = 'Ground-truth'
|
||||
else:
|
||||
text = 'Tracking'
|
||||
raise TrackEvalException(
|
||||
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
|
||||
[str(x) + ', ' for x in extra_time_keys]))
|
||||
|
||||
for t in range(num_timesteps):
|
||||
time_key = str(t)
|
||||
if time_key in read_data.keys():
|
||||
time_data = np.asarray(read_data[time_key], dtype=np.float)
|
||||
raw_data['dets'][t] = np.atleast_2d(time_data[:, 6:10])
|
||||
raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
|
||||
raw_data['classes'][t] = np.atleast_1d(time_data[:, 2]).astype(int)
|
||||
if is_gt:
|
||||
gt_extras_dict = {'truncation': np.atleast_1d(time_data[:, 3].astype(int)),
|
||||
'occlusion': np.atleast_1d(time_data[:, 4].astype(int))}
|
||||
raw_data['gt_extras'][t] = gt_extras_dict
|
||||
else:
|
||||
if time_data.shape[1] > 17:
|
||||
raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 17])
|
||||
else:
|
||||
raw_data['tracker_confidences'][t] = np.ones(time_data.shape[0])
|
||||
else:
|
||||
raw_data['dets'][t] = np.empty((0, 4))
|
||||
raw_data['ids'][t] = np.empty(0).astype(int)
|
||||
raw_data['classes'][t] = np.empty(0).astype(int)
|
||||
if is_gt:
|
||||
gt_extras_dict = {'truncation': np.empty(0),
|
||||
'occlusion': np.empty(0)}
|
||||
raw_data['gt_extras'][t] = gt_extras_dict
|
||||
else:
|
||||
raw_data['tracker_confidences'][t] = np.empty(0)
|
||||
if is_gt:
|
||||
if time_key in ignore_data.keys():
|
||||
time_ignore = np.asarray(ignore_data[time_key], dtype=np.float)
|
||||
raw_data['gt_crowd_ignore_regions'][t] = np.atleast_2d(time_ignore[:, 6:10])
|
||||
else:
|
||||
raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
raw_data['num_timesteps'] = num_timesteps
|
||||
raw_data['seq'] = seq
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
|
||||
KITTI:
|
||||
In KITTI, the 4 preproc steps are as follow:
|
||||
1) There are two classes (pedestrian and car) which are evaluated separately.
|
||||
2) For the pedestrian class, the 'person' class is distractor objects (people sitting).
|
||||
For the car class, the 'van' class are distractor objects.
|
||||
GT boxes marked as having occlusion level > 2 or truncation level > 0 are also treated as
|
||||
distractors.
|
||||
3) Crowd ignore regions are used to remove unmatched detections. Also unmatched detections with
|
||||
height <= 25 pixels are removed.
|
||||
4) Distractor gt dets (including truncated and occluded) are removed.
|
||||
"""
|
||||
if cls == 'pedestrian':
|
||||
distractor_classes = [self.class_name_to_class_id['person']]
|
||||
elif cls == 'car':
|
||||
distractor_classes = [self.class_name_to_class_id['van']]
|
||||
else:
|
||||
raise (TrackEvalException('Class %s is not evaluatable' % cls))
|
||||
cls_id = self.class_name_to_class_id[cls]
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Only extract relevant dets for this class for preproc and eval (cls + distractor classes)
|
||||
gt_class_mask = np.sum([raw_data['gt_classes'][t] == c for c in [cls_id] + distractor_classes], axis=0)
|
||||
gt_class_mask = gt_class_mask.astype(np.bool)
|
||||
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
|
||||
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
|
||||
gt_classes = raw_data['gt_classes'][t][gt_class_mask]
|
||||
gt_occlusion = raw_data['gt_extras'][t]['occlusion'][gt_class_mask]
|
||||
gt_truncation = raw_data['gt_extras'][t]['truncation'][gt_class_mask]
|
||||
|
||||
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
|
||||
tracker_class_mask = tracker_class_mask.astype(np.bool)
|
||||
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
|
||||
tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
|
||||
tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
|
||||
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
|
||||
|
||||
# Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
|
||||
# which are labeled as truncated, occluded, or belonging to a distractor class.
|
||||
to_remove_matched = np.array([], np.int)
|
||||
unmatched_indices = np.arange(tracker_ids.shape[0])
|
||||
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
|
||||
matching_scores = similarity_scores.copy()
|
||||
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
|
||||
match_rows, match_cols = linear_sum_assignment(-matching_scores)
|
||||
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
match_rows = match_rows[actually_matched_mask]
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
|
||||
is_distractor_class = np.isin(gt_classes[match_rows], distractor_classes)
|
||||
is_occluded_or_truncated = np.logical_or(
|
||||
gt_occlusion[match_rows] > self.max_occlusion + np.finfo('float').eps,
|
||||
gt_truncation[match_rows] > self.max_truncation + np.finfo('float').eps)
|
||||
to_remove_matched = np.logical_or(is_distractor_class, is_occluded_or_truncated)
|
||||
to_remove_matched = match_cols[to_remove_matched]
|
||||
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
|
||||
|
||||
# For unmatched tracker dets, also remove those smaller than a minimum height.
|
||||
unmatched_tracker_dets = tracker_dets[unmatched_indices, :]
|
||||
unmatched_heights = unmatched_tracker_dets[:, 3] - unmatched_tracker_dets[:, 1]
|
||||
is_too_small = unmatched_heights <= self.min_height + np.finfo('float').eps
|
||||
|
||||
# For unmatched tracker dets, also remove those that are greater than 50% within a crowd ignore region.
|
||||
crowd_ignore_regions = raw_data['gt_crowd_ignore_regions'][t]
|
||||
intersection_with_ignore_region = self._calculate_box_ious(unmatched_tracker_dets, crowd_ignore_regions,
|
||||
box_format='x0y0x1y1', do_ioa=True)
|
||||
is_within_crowd_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps, axis=1)
|
||||
|
||||
# Apply preprocessing to remove all unwanted tracker dets.
|
||||
to_remove_unmatched = unmatched_indices[np.logical_or(is_too_small, is_within_crowd_ignore_region)]
|
||||
to_remove_tracker = np.concatenate((to_remove_matched, to_remove_unmatched), axis=0)
|
||||
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
|
||||
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
|
||||
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
|
||||
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
|
||||
|
||||
# Also remove gt dets that were only useful for preprocessing and are not needed for evaluation.
|
||||
# These are those that are occluded, truncated and from distractor objects.
|
||||
gt_to_keep_mask = (np.less_equal(gt_occlusion, self.max_occlusion)) & \
|
||||
(np.less_equal(gt_truncation, self.max_truncation)) & \
|
||||
(np.equal(gt_classes, cls_id))
|
||||
data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
|
||||
data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
|
||||
data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
data['seq'] = raw_data['seq']
|
||||
|
||||
# Ensure that ids are unique per timestep.
|
||||
self._check_unique_ids(data)
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='x0y0x1y1')
|
||||
return similarity_scores
|
||||
426
test/yolov7-tracker/tracker/trackeval/datasets/kitti_mots.py
Normal file
426
test/yolov7-tracker/tracker/trackeval/datasets/kitti_mots.py
Normal file
@@ -0,0 +1,426 @@
|
||||
import os
|
||||
import csv
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ._base_dataset import _BaseDataset
|
||||
from .. import utils
|
||||
from .. import _timing
|
||||
from ..utils import TrackEvalException
|
||||
|
||||
|
||||
class KittiMOTS(_BaseDataset):
|
||||
"""Dataset class for KITTI MOTS tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/kitti/kitti_mots_val'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/kitti/kitti_mots_val'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'CLASSES_TO_EVAL': ['car', 'pedestrian'], # Valid: ['car', 'pedestrian']
|
||||
'SPLIT_TO_EVAL': 'val', # Valid: 'training', 'val'
|
||||
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER)
|
||||
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use seqmap_folder/split_to_eval.seqmap)
|
||||
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
|
||||
'GT_LOC_FORMAT': '{gt_folder}/label_02/{seq}.txt', # format of gt localization
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
self.gt_fol = self.config['GT_FOLDER']
|
||||
self.tracker_fol = self.config['TRACKERS_FOLDER']
|
||||
self.split_to_eval = self.config['SPLIT_TO_EVAL']
|
||||
self.should_classes_combine = False
|
||||
self.use_super_categories = False
|
||||
self.data_is_zipped = self.config['INPUT_AS_ZIP']
|
||||
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
|
||||
# Get classes to eval
|
||||
self.valid_classes = ['car', 'pedestrian']
|
||||
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
for cls in self.config['CLASSES_TO_EVAL']]
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. '
|
||||
'Only classes [car, pedestrian] are valid.')
|
||||
self.class_name_to_class_id = {'car': '1', 'pedestrian': '2', 'ignore': '10'}
|
||||
|
||||
# Get sequences to eval and check gt files exist
|
||||
self.seq_list, self.seq_lengths = self._get_seq_info()
|
||||
if len(self.seq_list) < 1:
|
||||
raise TrackEvalException('No sequences are selected to be evaluated.')
|
||||
|
||||
# Check gt files exist
|
||||
for seq in self.seq_list:
|
||||
if not self.data_is_zipped:
|
||||
curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
|
||||
if not os.path.isfile(curr_file):
|
||||
print('GT file not found ' + curr_file)
|
||||
raise TrackEvalException('GT file not found for sequence: ' + seq)
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
for tracker in self.tracker_list:
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
|
||||
else:
|
||||
for seq in self.seq_list:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException(
|
||||
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
|
||||
curr_file))
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _get_seq_info(self):
|
||||
seq_list = []
|
||||
seq_lengths = {}
|
||||
seqmap_name = 'evaluate_mots.seqmap.' + self.config['SPLIT_TO_EVAL']
|
||||
|
||||
if self.config["SEQ_INFO"]:
|
||||
seq_list = list(self.config["SEQ_INFO"].keys())
|
||||
seq_lengths = self.config["SEQ_INFO"]
|
||||
else:
|
||||
if self.config["SEQMAP_FILE"]:
|
||||
seqmap_file = self.config["SEQMAP_FILE"]
|
||||
else:
|
||||
if self.config["SEQMAP_FOLDER"] is None:
|
||||
seqmap_file = os.path.join(self.config['GT_FOLDER'], seqmap_name)
|
||||
else:
|
||||
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], seqmap_name)
|
||||
if not os.path.isfile(seqmap_file):
|
||||
print('no seqmap found: ' + seqmap_file)
|
||||
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
|
||||
with open(seqmap_file) as fp:
|
||||
reader = csv.reader(fp)
|
||||
for i, _ in enumerate(reader):
|
||||
dialect = csv.Sniffer().sniff(fp.read(1024))
|
||||
fp.seek(0)
|
||||
reader = csv.reader(fp, dialect)
|
||||
for row in reader:
|
||||
if len(row) >= 4:
|
||||
seq = "%04d" % int(row[0])
|
||||
seq_list.append(seq)
|
||||
seq_lengths[seq] = int(row[3]) + 1
|
||||
return seq_list, seq_lengths
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the KITTI MOTS format
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets]: list (for each timestep) of lists of detections.
|
||||
[gt_ignore_region]: list (for each timestep) of masks for the ignore regions
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
"""
|
||||
|
||||
# Only loaded when run to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
|
||||
# File location
|
||||
if self.data_is_zipped:
|
||||
if is_gt:
|
||||
zip_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
else:
|
||||
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
file = seq + '.txt'
|
||||
else:
|
||||
zip_file = None
|
||||
if is_gt:
|
||||
file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
|
||||
else:
|
||||
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
|
||||
# Ignore regions
|
||||
if is_gt:
|
||||
crowd_ignore_filter = {2: ['10']}
|
||||
else:
|
||||
crowd_ignore_filter = None
|
||||
|
||||
# Load raw data from text file
|
||||
read_data, ignore_data = self._load_simple_text_file(file, crowd_ignore_filter=crowd_ignore_filter,
|
||||
is_zipped=self.data_is_zipped, zip_file=zip_file,
|
||||
force_delimiters=' ')
|
||||
|
||||
# Convert data to required format
|
||||
num_timesteps = self.seq_lengths[seq]
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if is_gt:
|
||||
data_keys += ['gt_ignore_region']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
|
||||
# Check for any extra time keys
|
||||
current_time_keys = [str(t) for t in range(num_timesteps)]
|
||||
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
|
||||
if len(extra_time_keys) > 0:
|
||||
if is_gt:
|
||||
text = 'Ground-truth'
|
||||
else:
|
||||
text = 'Tracking'
|
||||
raise TrackEvalException(
|
||||
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
|
||||
[str(x) + ', ' for x in extra_time_keys]))
|
||||
|
||||
for t in range(num_timesteps):
|
||||
time_key = str(t)
|
||||
# list to collect all masks of a timestep to check for overlapping areas
|
||||
all_masks = []
|
||||
if time_key in read_data.keys():
|
||||
try:
|
||||
raw_data['dets'][t] = [{'size': [int(region[3]), int(region[4])],
|
||||
'counts': region[5].encode(encoding='UTF-8')}
|
||||
for region in read_data[time_key]]
|
||||
raw_data['ids'][t] = np.atleast_1d([region[1] for region in read_data[time_key]]).astype(int)
|
||||
raw_data['classes'][t] = np.atleast_1d([region[2] for region in read_data[time_key]]).astype(int)
|
||||
all_masks += raw_data['dets'][t]
|
||||
except IndexError:
|
||||
self._raise_index_error(is_gt, tracker, seq)
|
||||
except ValueError:
|
||||
self._raise_value_error(is_gt, tracker, seq)
|
||||
else:
|
||||
raw_data['dets'][t] = []
|
||||
raw_data['ids'][t] = np.empty(0).astype(int)
|
||||
raw_data['classes'][t] = np.empty(0).astype(int)
|
||||
if is_gt:
|
||||
if time_key in ignore_data.keys():
|
||||
try:
|
||||
time_ignore = [{'size': [int(region[3]), int(region[4])],
|
||||
'counts': region[5].encode(encoding='UTF-8')}
|
||||
for region in ignore_data[time_key]]
|
||||
raw_data['gt_ignore_region'][t] = mask_utils.merge([mask for mask in time_ignore],
|
||||
intersect=False)
|
||||
all_masks += [raw_data['gt_ignore_region'][t]]
|
||||
except IndexError:
|
||||
self._raise_index_error(is_gt, tracker, seq)
|
||||
except ValueError:
|
||||
self._raise_value_error(is_gt, tracker, seq)
|
||||
else:
|
||||
raw_data['gt_ignore_region'][t] = mask_utils.merge([], intersect=False)
|
||||
|
||||
# check for overlapping masks
|
||||
if all_masks:
|
||||
masks_merged = all_masks[0]
|
||||
for mask in all_masks[1:]:
|
||||
if mask_utils.area(mask_utils.merge([masks_merged, mask], intersect=True)) != 0.0:
|
||||
raise TrackEvalException(
|
||||
'Tracker has overlapping masks. Tracker: ' + tracker + ' Seq: ' + seq + ' Timestep: ' + str(
|
||||
t))
|
||||
masks_merged = mask_utils.merge([masks_merged, mask], intersect=False)
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
raw_data["num_timesteps"] = num_timesteps
|
||||
raw_data['seq'] = seq
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detection masks.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
|
||||
KITTI MOTS:
|
||||
In KITTI MOTS, the 4 preproc steps are as follow:
|
||||
1) There are two classes (car and pedestrian) which are evaluated separately.
|
||||
2) There are no ground truth detections marked as to be removed/distractor classes.
|
||||
Therefore also no matched tracker detections are removed.
|
||||
3) Ignore regions are used to remove unmatched detections (at least 50% overlap with ignore region).
|
||||
4) There are no ground truth detections (e.g. those of distractor classes) to be removed.
|
||||
"""
|
||||
# Check that input data has unique ids
|
||||
self._check_unique_ids(raw_data)
|
||||
|
||||
cls_id = int(self.class_name_to_class_id[cls])
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Only extract relevant dets for this class for preproc and eval (cls)
|
||||
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
|
||||
gt_class_mask = gt_class_mask.astype(np.bool)
|
||||
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
|
||||
gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
|
||||
|
||||
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
|
||||
tracker_class_mask = tracker_class_mask.astype(np.bool)
|
||||
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
|
||||
tracker_dets = [raw_data['tracker_dets'][t][ind] for ind in range(len(tracker_class_mask)) if
|
||||
tracker_class_mask[ind]]
|
||||
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
|
||||
|
||||
# Match tracker and gt dets (with hungarian algorithm)
|
||||
unmatched_indices = np.arange(tracker_ids.shape[0])
|
||||
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
|
||||
matching_scores = similarity_scores.copy()
|
||||
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = -10000
|
||||
match_rows, match_cols = linear_sum_assignment(-matching_scores)
|
||||
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
|
||||
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
|
||||
|
||||
# For unmatched tracker dets, remove those that are greater than 50% within a crowd ignore region.
|
||||
unmatched_tracker_dets = [tracker_dets[i] for i in range(len(tracker_dets)) if i in unmatched_indices]
|
||||
ignore_region = raw_data['gt_ignore_region'][t]
|
||||
intersection_with_ignore_region = self._calculate_mask_ious(unmatched_tracker_dets, [ignore_region],
|
||||
is_encoded=True, do_ioa=True)
|
||||
is_within_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps, axis=1)
|
||||
|
||||
# Apply preprocessing to remove unwanted tracker dets.
|
||||
to_remove_tracker = unmatched_indices[is_within_ignore_region]
|
||||
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
|
||||
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
|
||||
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
|
||||
|
||||
# Keep all ground truth detections
|
||||
data['gt_ids'][t] = gt_ids
|
||||
data['gt_dets'][t] = gt_dets
|
||||
data['similarity_scores'][t] = similarity_scores
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
data['seq'] = raw_data['seq']
|
||||
data['cls'] = cls
|
||||
|
||||
# Ensure again that ids are unique per timestep after preproc.
|
||||
self._check_unique_ids(data, after_preproc=True)
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
|
||||
return similarity_scores
|
||||
|
||||
@staticmethod
|
||||
def _raise_index_error(is_gt, tracker, seq):
|
||||
"""
|
||||
Auxiliary method to raise an evaluation error in case of an index error while reading files.
|
||||
:param is_gt: whether gt or tracker data is read
|
||||
:param tracker: the name of the tracker
|
||||
:param seq: the name of the seq
|
||||
:return: None
|
||||
"""
|
||||
if is_gt:
|
||||
err = 'Cannot load gt data from sequence %s, because there are not enough ' \
|
||||
'columns in the data.' % seq
|
||||
raise TrackEvalException(err)
|
||||
else:
|
||||
err = 'Cannot load tracker data from tracker %s, sequence %s, because there are not enough ' \
|
||||
'columns in the data.' % (tracker, seq)
|
||||
raise TrackEvalException(err)
|
||||
|
||||
@staticmethod
|
||||
def _raise_value_error(is_gt, tracker, seq):
|
||||
"""
|
||||
Auxiliary method to raise an evaluation error in case of an value error while reading files.
|
||||
:param is_gt: whether gt or tracker data is read
|
||||
:param tracker: the name of the tracker
|
||||
:param seq: the name of the seq
|
||||
:return: None
|
||||
"""
|
||||
if is_gt:
|
||||
raise TrackEvalException(
|
||||
'GT data for sequence %s cannot be converted to the right format. Is data corrupted?' % seq)
|
||||
else:
|
||||
raise TrackEvalException(
|
||||
'Tracking data from tracker %s, sequence %s cannot be converted to the right format. '
|
||||
'Is data corrupted?' % (tracker, seq))
|
||||
@@ -0,0 +1,437 @@
|
||||
import os
|
||||
import csv
|
||||
import configparser
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ._base_dataset import _BaseDataset
|
||||
from .. import utils
|
||||
from .. import _timing
|
||||
from ..utils import TrackEvalException
|
||||
|
||||
|
||||
class MotChallenge2DBox(_BaseDataset):
|
||||
"""Dataset class for MOT Challenge 2D bounding box tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/mot_challenge/'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/mot_challenge/'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'CLASSES_TO_EVAL': ['pedestrian'], # Valid: ['pedestrian']
|
||||
'BENCHMARK': 'MOT17', # Valid: 'MOT17', 'MOT16', 'MOT20', 'MOT15'
|
||||
'SPLIT_TO_EVAL': 'train', # Valid: 'train', 'test', 'all'
|
||||
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'DO_PREPROC': True, # Whether to perform preprocessing (never done for MOT15)
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
|
||||
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use seqmap_folder/benchmark-split_to_eval)
|
||||
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
|
||||
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt', # '{gt_folder}/{seq}/gt/gt.txt'
|
||||
'SKIP_SPLIT_FOL': False, # If False, data is in GT_FOLDER/BENCHMARK-SPLIT_TO_EVAL/ and in
|
||||
# TRACKERS_FOLDER/BENCHMARK-SPLIT_TO_EVAL/tracker/
|
||||
# If True, then the middle 'benchmark-split' folder is skipped for both.
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
|
||||
self.benchmark = self.config['BENCHMARK']
|
||||
gt_set = self.config['BENCHMARK'] + '-' + self.config['SPLIT_TO_EVAL']
|
||||
self.gt_set = gt_set
|
||||
if not self.config['SKIP_SPLIT_FOL']:
|
||||
split_fol = gt_set
|
||||
else:
|
||||
split_fol = ''
|
||||
self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
|
||||
self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
|
||||
self.should_classes_combine = False
|
||||
self.use_super_categories = False
|
||||
self.data_is_zipped = self.config['INPUT_AS_ZIP']
|
||||
self.do_preproc = self.config['DO_PREPROC']
|
||||
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
|
||||
# Get classes to eval
|
||||
self.valid_classes = ['pedestrian']
|
||||
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
for cls in self.config['CLASSES_TO_EVAL']]
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
|
||||
self.class_name_to_class_id = {'pedestrian': 1, 'person_on_vehicle': 2, 'car': 3, 'bicycle': 4, 'motorbike': 5,
|
||||
'non_mot_vehicle': 6, 'static_person': 7, 'distractor': 8, 'occluder': 9,
|
||||
'occluder_on_ground': 10, 'occluder_full': 11, 'reflection': 12, 'crowd': 13}
|
||||
self.valid_class_numbers = list(self.class_name_to_class_id.values())
|
||||
|
||||
# Get sequences to eval and check gt files exist
|
||||
self.seq_list, self.seq_lengths = self._get_seq_info()
|
||||
if len(self.seq_list) < 1:
|
||||
raise TrackEvalException('No sequences are selected to be evaluated.')
|
||||
|
||||
# Check gt files exist
|
||||
for seq in self.seq_list:
|
||||
if not self.data_is_zipped:
|
||||
curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
|
||||
if not os.path.isfile(curr_file):
|
||||
print('GT file not found ' + curr_file)
|
||||
raise TrackEvalException('GT file not found for sequence: ' + seq)
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('GT file not found ' + curr_file)
|
||||
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
for tracker in self.tracker_list:
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
|
||||
else:
|
||||
for seq in self.seq_list:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException(
|
||||
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
|
||||
curr_file))
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _get_seq_info(self):
|
||||
seq_list = []
|
||||
seq_lengths = {}
|
||||
if self.config["SEQ_INFO"]:
|
||||
seq_list = list(self.config["SEQ_INFO"].keys())
|
||||
seq_lengths = self.config["SEQ_INFO"]
|
||||
|
||||
# If sequence length is 'None' tries to read sequence length from .ini files.
|
||||
for seq, seq_length in seq_lengths.items():
|
||||
if seq_length is None:
|
||||
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
|
||||
if not os.path.isfile(ini_file):
|
||||
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
|
||||
ini_data = configparser.ConfigParser()
|
||||
ini_data.read(ini_file)
|
||||
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
|
||||
|
||||
else:
|
||||
if self.config["SEQMAP_FILE"]:
|
||||
seqmap_file = self.config["SEQMAP_FILE"]
|
||||
else:
|
||||
if self.config["SEQMAP_FOLDER"] is None:
|
||||
seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
|
||||
else:
|
||||
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
|
||||
if not os.path.isfile(seqmap_file):
|
||||
print('no seqmap found: ' + seqmap_file)
|
||||
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
|
||||
with open(seqmap_file) as fp:
|
||||
reader = csv.reader(fp)
|
||||
for i, row in enumerate(reader):
|
||||
if i == 0 or row[0] == '':
|
||||
continue
|
||||
seq = row[0]
|
||||
seq_list.append(seq)
|
||||
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
|
||||
if not os.path.isfile(ini_file):
|
||||
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
|
||||
ini_data = configparser.ConfigParser()
|
||||
ini_data.read(ini_file)
|
||||
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
|
||||
return seq_list, seq_lengths
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the MOT Challenge 2D box format
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
|
||||
[gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
"""
|
||||
# File location
|
||||
if self.data_is_zipped:
|
||||
if is_gt:
|
||||
zip_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
else:
|
||||
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
file = seq + '.txt'
|
||||
else:
|
||||
zip_file = None
|
||||
if is_gt:
|
||||
file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
|
||||
else:
|
||||
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
|
||||
# Load raw data from text file
|
||||
read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file)
|
||||
|
||||
# Convert data to required format
|
||||
num_timesteps = self.seq_lengths[seq]
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if is_gt:
|
||||
data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
|
||||
else:
|
||||
data_keys += ['tracker_confidences']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
|
||||
# Check for any extra time keys
|
||||
current_time_keys = [str( t+ 1) for t in range(num_timesteps)]
|
||||
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
|
||||
if len(extra_time_keys) > 0:
|
||||
if is_gt:
|
||||
text = 'Ground-truth'
|
||||
else:
|
||||
text = 'Tracking'
|
||||
raise TrackEvalException(
|
||||
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
|
||||
[str(x) + ', ' for x in extra_time_keys]))
|
||||
|
||||
for t in range(num_timesteps):
|
||||
time_key = str(t+1)
|
||||
if time_key in read_data.keys():
|
||||
try:
|
||||
time_data = np.asarray(read_data[time_key], dtype=np.float)
|
||||
except ValueError:
|
||||
if is_gt:
|
||||
raise TrackEvalException(
|
||||
'Cannot convert gt data for sequence %s to float. Is data corrupted?' % seq)
|
||||
else:
|
||||
raise TrackEvalException(
|
||||
'Cannot convert tracking data from tracker %s, sequence %s to float. Is data corrupted?' % (
|
||||
tracker, seq))
|
||||
try:
|
||||
raw_data['dets'][t] = np.atleast_2d(time_data[:, 2:6])
|
||||
raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
|
||||
except IndexError:
|
||||
if is_gt:
|
||||
err = 'Cannot load gt data from sequence %s, because there is not enough ' \
|
||||
'columns in the data.' % seq
|
||||
raise TrackEvalException(err)
|
||||
else:
|
||||
err = 'Cannot load tracker data from tracker %s, sequence %s, because there is not enough ' \
|
||||
'columns in the data.' % (tracker, seq)
|
||||
raise TrackEvalException(err)
|
||||
if time_data.shape[1] >= 8:
|
||||
raw_data['classes'][t] = np.atleast_1d(time_data[:, 7]).astype(int)
|
||||
else:
|
||||
if not is_gt:
|
||||
raw_data['classes'][t] = np.ones_like(raw_data['ids'][t])
|
||||
else:
|
||||
raise TrackEvalException(
|
||||
'GT data is not in a valid format, there is not enough rows in seq %s, timestep %i.' % (
|
||||
seq, t))
|
||||
if is_gt:
|
||||
gt_extras_dict = {'zero_marked': np.atleast_1d(time_data[:, 6].astype(int))}
|
||||
raw_data['gt_extras'][t] = gt_extras_dict
|
||||
else:
|
||||
raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 6])
|
||||
else:
|
||||
raw_data['dets'][t] = np.empty((0, 4))
|
||||
raw_data['ids'][t] = np.empty(0).astype(int)
|
||||
raw_data['classes'][t] = np.empty(0).astype(int)
|
||||
if is_gt:
|
||||
gt_extras_dict = {'zero_marked': np.empty(0)}
|
||||
raw_data['gt_extras'][t] = gt_extras_dict
|
||||
else:
|
||||
raw_data['tracker_confidences'][t] = np.empty(0)
|
||||
if is_gt:
|
||||
raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
raw_data['num_timesteps'] = num_timesteps
|
||||
raw_data['seq'] = seq
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
|
||||
MOT Challenge:
|
||||
In MOT Challenge, the 4 preproc steps are as follow:
|
||||
1) There is only one class (pedestrian) to be evaluated, but all other classes are used for preproc.
|
||||
2) Predictions are matched against all gt boxes (regardless of class), those matching with distractor
|
||||
objects are removed.
|
||||
3) There is no crowd ignore regions.
|
||||
4) All gt dets except pedestrian are removed, also removes pedestrian gt dets marked with zero_marked.
|
||||
"""
|
||||
# Check that input data has unique ids
|
||||
self._check_unique_ids(raw_data)
|
||||
|
||||
distractor_class_names = ['person_on_vehicle', 'static_person', 'distractor', 'reflection']
|
||||
if self.benchmark == 'MOT20':
|
||||
distractor_class_names.append('non_mot_vehicle')
|
||||
distractor_classes = [self.class_name_to_class_id[x] for x in distractor_class_names]
|
||||
cls_id = self.class_name_to_class_id[cls]
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Get all data
|
||||
gt_ids = raw_data['gt_ids'][t]
|
||||
gt_dets = raw_data['gt_dets'][t]
|
||||
gt_classes = raw_data['gt_classes'][t]
|
||||
gt_zero_marked = raw_data['gt_extras'][t]['zero_marked']
|
||||
|
||||
tracker_ids = raw_data['tracker_ids'][t]
|
||||
tracker_dets = raw_data['tracker_dets'][t]
|
||||
tracker_classes = raw_data['tracker_classes'][t]
|
||||
tracker_confidences = raw_data['tracker_confidences'][t]
|
||||
similarity_scores = raw_data['similarity_scores'][t]
|
||||
|
||||
# Evaluation is ONLY valid for pedestrian class
|
||||
if len(tracker_classes) > 0 and np.max(tracker_classes) > 1:
|
||||
raise TrackEvalException(
|
||||
'Evaluation is only valid for pedestrian class. Non pedestrian class (%i) found in sequence %s at '
|
||||
'timestep %i.' % (np.max(tracker_classes), raw_data['seq'], t))
|
||||
|
||||
# Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
|
||||
# which are labeled as belonging to a distractor class.
|
||||
to_remove_tracker = np.array([], np.int)
|
||||
if self.do_preproc and self.benchmark != 'MOT15' and gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
|
||||
|
||||
# Check all classes are valid:
|
||||
invalid_classes = np.setdiff1d(np.unique(gt_classes), self.valid_class_numbers)
|
||||
if len(invalid_classes) > 0:
|
||||
print(' '.join([str(x) for x in invalid_classes]))
|
||||
raise(TrackEvalException('Attempting to evaluate using invalid gt classes. '
|
||||
'This warning only triggers if preprocessing is performed, '
|
||||
'e.g. not for MOT15 or where prepropressing is explicitly disabled. '
|
||||
'Please either check your gt data, or disable preprocessing. '
|
||||
'The following invalid classes were found in timestep ' + str(t) + ': ' +
|
||||
' '.join([str(x) for x in invalid_classes])))
|
||||
|
||||
matching_scores = similarity_scores.copy()
|
||||
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
|
||||
match_rows, match_cols = linear_sum_assignment(-matching_scores)
|
||||
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
match_rows = match_rows[actually_matched_mask]
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
|
||||
is_distractor_class = np.isin(gt_classes[match_rows], distractor_classes)
|
||||
to_remove_tracker = match_cols[is_distractor_class]
|
||||
|
||||
# Apply preprocessing to remove all unwanted tracker dets.
|
||||
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
|
||||
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
|
||||
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
|
||||
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
|
||||
|
||||
# Remove gt detections marked as to remove (zero marked), and also remove gt detections not in pedestrian
|
||||
# class (not applicable for MOT15)
|
||||
if self.do_preproc and self.benchmark != 'MOT15':
|
||||
gt_to_keep_mask = (np.not_equal(gt_zero_marked, 0)) & \
|
||||
(np.equal(gt_classes, cls_id))
|
||||
else:
|
||||
# There are no classes for MOT15
|
||||
gt_to_keep_mask = np.not_equal(gt_zero_marked, 0)
|
||||
data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
|
||||
data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
|
||||
data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
data['seq'] = raw_data['seq']
|
||||
|
||||
# Ensure again that ids are unique per timestep after preproc.
|
||||
self._check_unique_ids(data, after_preproc=True)
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='xywh')
|
||||
return similarity_scores
|
||||
446
test/yolov7-tracker/tracker/trackeval/datasets/mots_challenge.py
Normal file
446
test/yolov7-tracker/tracker/trackeval/datasets/mots_challenge.py
Normal file
@@ -0,0 +1,446 @@
|
||||
import os
|
||||
import csv
|
||||
import configparser
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ._base_dataset import _BaseDataset
|
||||
from .. import utils
|
||||
from .. import _timing
|
||||
from ..utils import TrackEvalException
|
||||
|
||||
|
||||
class MOTSChallenge(_BaseDataset):
|
||||
"""Dataset class for MOTS Challenge tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/mot_challenge/'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/mot_challenge/'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'CLASSES_TO_EVAL': ['pedestrian'], # Valid: ['pedestrian']
|
||||
'SPLIT_TO_EVAL': 'train', # Valid: 'train', 'test'
|
||||
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
|
||||
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use seqmap_folder/MOTS-split_to_eval)
|
||||
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
|
||||
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt', # '{gt_folder}/{seq}/gt/gt.txt'
|
||||
'SKIP_SPLIT_FOL': False, # If False, data is in GT_FOLDER/MOTS-SPLIT_TO_EVAL/ and in
|
||||
# TRACKERS_FOLDER/MOTS-SPLIT_TO_EVAL/tracker/
|
||||
# If True, then the middle 'MOTS-split' folder is skipped for both.
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
|
||||
self.benchmark = 'MOTS'
|
||||
self.gt_set = self.benchmark + '-' + self.config['SPLIT_TO_EVAL']
|
||||
if not self.config['SKIP_SPLIT_FOL']:
|
||||
split_fol = self.gt_set
|
||||
else:
|
||||
split_fol = ''
|
||||
self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
|
||||
self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
|
||||
self.should_classes_combine = False
|
||||
self.use_super_categories = False
|
||||
self.data_is_zipped = self.config['INPUT_AS_ZIP']
|
||||
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
|
||||
# Get classes to eval
|
||||
self.valid_classes = ['pedestrian']
|
||||
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
for cls in self.config['CLASSES_TO_EVAL']]
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
|
||||
self.class_name_to_class_id = {'pedestrian': '2', 'ignore': '10'}
|
||||
|
||||
# Get sequences to eval and check gt files exist
|
||||
self.seq_list, self.seq_lengths = self._get_seq_info()
|
||||
if len(self.seq_list) < 1:
|
||||
raise TrackEvalException('No sequences are selected to be evaluated.')
|
||||
|
||||
# Check gt files exist
|
||||
for seq in self.seq_list:
|
||||
if not self.data_is_zipped:
|
||||
curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
|
||||
if not os.path.isfile(curr_file):
|
||||
print('GT file not found ' + curr_file)
|
||||
raise TrackEvalException('GT file not found for sequence: ' + seq)
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('GT file not found ' + curr_file)
|
||||
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
for tracker in self.tracker_list:
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
|
||||
else:
|
||||
for seq in self.seq_list:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException(
|
||||
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
|
||||
curr_file))
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _get_seq_info(self):
|
||||
seq_list = []
|
||||
seq_lengths = {}
|
||||
if self.config["SEQ_INFO"]:
|
||||
seq_list = list(self.config["SEQ_INFO"].keys())
|
||||
seq_lengths = self.config["SEQ_INFO"]
|
||||
|
||||
# If sequence length is 'None' tries to read sequence length from .ini files.
|
||||
for seq, seq_length in seq_lengths.items():
|
||||
if seq_length is None:
|
||||
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
|
||||
if not os.path.isfile(ini_file):
|
||||
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
|
||||
ini_data = configparser.ConfigParser()
|
||||
ini_data.read(ini_file)
|
||||
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
|
||||
|
||||
else:
|
||||
if self.config["SEQMAP_FILE"]:
|
||||
seqmap_file = self.config["SEQMAP_FILE"]
|
||||
else:
|
||||
if self.config["SEQMAP_FOLDER"] is None:
|
||||
seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
|
||||
else:
|
||||
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
|
||||
if not os.path.isfile(seqmap_file):
|
||||
print('no seqmap found: ' + seqmap_file)
|
||||
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
|
||||
with open(seqmap_file) as fp:
|
||||
reader = csv.reader(fp)
|
||||
for i, row in enumerate(reader):
|
||||
if i == 0 or row[0] == '':
|
||||
continue
|
||||
seq = row[0]
|
||||
seq_list.append(seq)
|
||||
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
|
||||
if not os.path.isfile(ini_file):
|
||||
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
|
||||
ini_data = configparser.ConfigParser()
|
||||
ini_data.read(ini_file)
|
||||
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
|
||||
return seq_list, seq_lengths
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the MOTS Challenge format
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets]: list (for each timestep) of lists of detections.
|
||||
[gt_ignore_region]: list (for each timestep) of masks for the ignore regions
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
"""
|
||||
|
||||
# Only loaded when run to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
|
||||
# File location
|
||||
if self.data_is_zipped:
|
||||
if is_gt:
|
||||
zip_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
else:
|
||||
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
file = seq + '.txt'
|
||||
else:
|
||||
zip_file = None
|
||||
if is_gt:
|
||||
file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
|
||||
else:
|
||||
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
|
||||
# Ignore regions
|
||||
if is_gt:
|
||||
crowd_ignore_filter = {2: ['10']}
|
||||
else:
|
||||
crowd_ignore_filter = None
|
||||
|
||||
# Load raw data from text file
|
||||
read_data, ignore_data = self._load_simple_text_file(file, crowd_ignore_filter=crowd_ignore_filter,
|
||||
is_zipped=self.data_is_zipped, zip_file=zip_file,
|
||||
force_delimiters=' ')
|
||||
|
||||
# Convert data to required format
|
||||
num_timesteps = self.seq_lengths[seq]
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if is_gt:
|
||||
data_keys += ['gt_ignore_region']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
|
||||
# Check for any extra time keys
|
||||
current_time_keys = [str(t + 1) for t in range(num_timesteps)]
|
||||
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
|
||||
if len(extra_time_keys) > 0:
|
||||
if is_gt:
|
||||
text = 'Ground-truth'
|
||||
else:
|
||||
text = 'Tracking'
|
||||
raise TrackEvalException(
|
||||
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
|
||||
[str(x) + ', ' for x in extra_time_keys]))
|
||||
|
||||
for t in range(num_timesteps):
|
||||
time_key = str(t+1)
|
||||
# list to collect all masks of a timestep to check for overlapping areas
|
||||
all_masks = []
|
||||
if time_key in read_data.keys():
|
||||
try:
|
||||
raw_data['dets'][t] = [{'size': [int(region[3]), int(region[4])],
|
||||
'counts': region[5].encode(encoding='UTF-8')}
|
||||
for region in read_data[time_key]]
|
||||
raw_data['ids'][t] = np.atleast_1d([region[1] for region in read_data[time_key]]).astype(int)
|
||||
raw_data['classes'][t] = np.atleast_1d([region[2] for region in read_data[time_key]]).astype(int)
|
||||
all_masks += raw_data['dets'][t]
|
||||
except IndexError:
|
||||
self._raise_index_error(is_gt, tracker, seq)
|
||||
except ValueError:
|
||||
self._raise_value_error(is_gt, tracker, seq)
|
||||
else:
|
||||
raw_data['dets'][t] = []
|
||||
raw_data['ids'][t] = np.empty(0).astype(int)
|
||||
raw_data['classes'][t] = np.empty(0).astype(int)
|
||||
if is_gt:
|
||||
if time_key in ignore_data.keys():
|
||||
try:
|
||||
time_ignore = [{'size': [int(region[3]), int(region[4])],
|
||||
'counts': region[5].encode(encoding='UTF-8')}
|
||||
for region in ignore_data[time_key]]
|
||||
raw_data['gt_ignore_region'][t] = mask_utils.merge([mask for mask in time_ignore],
|
||||
intersect=False)
|
||||
all_masks += [raw_data['gt_ignore_region'][t]]
|
||||
except IndexError:
|
||||
self._raise_index_error(is_gt, tracker, seq)
|
||||
except ValueError:
|
||||
self._raise_value_error(is_gt, tracker, seq)
|
||||
else:
|
||||
raw_data['gt_ignore_region'][t] = mask_utils.merge([], intersect=False)
|
||||
|
||||
# check for overlapping masks
|
||||
if all_masks:
|
||||
masks_merged = all_masks[0]
|
||||
for mask in all_masks[1:]:
|
||||
if mask_utils.area(mask_utils.merge([masks_merged, mask], intersect=True)) != 0.0:
|
||||
raise TrackEvalException(
|
||||
'Tracker has overlapping masks. Tracker: ' + tracker + ' Seq: ' + seq + ' Timestep: ' + str(
|
||||
t))
|
||||
masks_merged = mask_utils.merge([masks_merged, mask], intersect=False)
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
raw_data['num_timesteps'] = num_timesteps
|
||||
raw_data['seq'] = seq
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detection masks.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
|
||||
MOTS Challenge:
|
||||
In MOTS Challenge, the 4 preproc steps are as follow:
|
||||
1) There is only one class (pedestrians) to be evaluated.
|
||||
2) There are no ground truth detections marked as to be removed/distractor classes.
|
||||
Therefore also no matched tracker detections are removed.
|
||||
3) Ignore regions are used to remove unmatched detections (at least 50% overlap with ignore region).
|
||||
4) There are no ground truth detections (e.g. those of distractor classes) to be removed.
|
||||
"""
|
||||
# Check that input data has unique ids
|
||||
self._check_unique_ids(raw_data)
|
||||
|
||||
cls_id = int(self.class_name_to_class_id[cls])
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Only extract relevant dets for this class for preproc and eval (cls)
|
||||
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
|
||||
gt_class_mask = gt_class_mask.astype(np.bool)
|
||||
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
|
||||
gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
|
||||
|
||||
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
|
||||
tracker_class_mask = tracker_class_mask.astype(np.bool)
|
||||
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
|
||||
tracker_dets = [raw_data['tracker_dets'][t][ind] for ind in range(len(tracker_class_mask)) if
|
||||
tracker_class_mask[ind]]
|
||||
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
|
||||
|
||||
# Match tracker and gt dets (with hungarian algorithm)
|
||||
unmatched_indices = np.arange(tracker_ids.shape[0])
|
||||
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
|
||||
matching_scores = similarity_scores.copy()
|
||||
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = -10000
|
||||
match_rows, match_cols = linear_sum_assignment(-matching_scores)
|
||||
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
|
||||
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
|
||||
|
||||
# For unmatched tracker dets, remove those that are greater than 50% within a crowd ignore region.
|
||||
unmatched_tracker_dets = [tracker_dets[i] for i in range(len(tracker_dets)) if i in unmatched_indices]
|
||||
ignore_region = raw_data['gt_ignore_region'][t]
|
||||
intersection_with_ignore_region = self._calculate_mask_ious(unmatched_tracker_dets, [ignore_region],
|
||||
is_encoded=True, do_ioa=True)
|
||||
is_within_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps, axis=1)
|
||||
|
||||
# Apply preprocessing to remove unwanted tracker dets.
|
||||
to_remove_tracker = unmatched_indices[is_within_ignore_region]
|
||||
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
|
||||
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
|
||||
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
|
||||
|
||||
# Keep all ground truth detections
|
||||
data['gt_ids'][t] = gt_ids
|
||||
data['gt_dets'][t] = gt_dets
|
||||
data['similarity_scores'][t] = similarity_scores
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
data['seq'] = raw_data['seq']
|
||||
|
||||
# Ensure again that ids are unique per timestep after preproc.
|
||||
self._check_unique_ids(data, after_preproc=True)
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
|
||||
return similarity_scores
|
||||
|
||||
@staticmethod
|
||||
def _raise_index_error(is_gt, tracker, seq):
|
||||
"""
|
||||
Auxiliary method to raise an evaluation error in case of an index error while reading files.
|
||||
:param is_gt: whether gt or tracker data is read
|
||||
:param tracker: the name of the tracker
|
||||
:param seq: the name of the seq
|
||||
:return: None
|
||||
"""
|
||||
if is_gt:
|
||||
err = 'Cannot load gt data from sequence %s, because there are not enough ' \
|
||||
'columns in the data.' % seq
|
||||
raise TrackEvalException(err)
|
||||
else:
|
||||
err = 'Cannot load tracker data from tracker %s, sequence %s, because there are not enough ' \
|
||||
'columns in the data.' % (tracker, seq)
|
||||
raise TrackEvalException(err)
|
||||
|
||||
@staticmethod
|
||||
def _raise_value_error(is_gt, tracker, seq):
|
||||
"""
|
||||
Auxiliary method to raise an evaluation error in case of an value error while reading files.
|
||||
:param is_gt: whether gt or tracker data is read
|
||||
:param tracker: the name of the tracker
|
||||
:param seq: the name of the seq
|
||||
:return: None
|
||||
"""
|
||||
if is_gt:
|
||||
raise TrackEvalException(
|
||||
'GT data for sequence %s cannot be converted to the right format. Is data corrupted?' % seq)
|
||||
else:
|
||||
raise TrackEvalException(
|
||||
'Tracking data from tracker %s, sequence %s cannot be converted to the right format. '
|
||||
'Is data corrupted?' % (tracker, seq))
|
||||
452
test/yolov7-tracker/tracker/trackeval/datasets/person_path_22.py
Normal file
452
test/yolov7-tracker/tracker/trackeval/datasets/person_path_22.py
Normal file
@@ -0,0 +1,452 @@
|
||||
import os
|
||||
import csv
|
||||
import configparser
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ._base_dataset import _BaseDataset
|
||||
from .. import utils
|
||||
from .. import _timing
|
||||
from ..utils import TrackEvalException
|
||||
|
||||
class PersonPath22(_BaseDataset):
|
||||
"""Dataset class for MOT Challenge 2D bounding box tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/person_path_22/'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/person_path_22/'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'CLASSES_TO_EVAL': ['pedestrian'], # Valid: ['pedestrian']
|
||||
'BENCHMARK': 'person_path_22', # Valid: 'person_path_22'
|
||||
'SPLIT_TO_EVAL': 'test', # Valid: 'train', 'test', 'all'
|
||||
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'DO_PREPROC': True, # Whether to perform preprocessing
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
|
||||
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use seqmap_folder/benchmark-split_to_eval)
|
||||
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
|
||||
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt', # '{gt_folder}/{seq}/gt/gt.txt'
|
||||
'SKIP_SPLIT_FOL': False, # If False, data is in GT_FOLDER/BENCHMARK-SPLIT_TO_EVAL/ and in
|
||||
# TRACKERS_FOLDER/BENCHMARK-SPLIT_TO_EVAL/tracker/
|
||||
# If True, then the middle 'benchmark-split' folder is skipped for both.
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
|
||||
self.benchmark = self.config['BENCHMARK']
|
||||
gt_set = self.config['BENCHMARK'] + '-' + self.config['SPLIT_TO_EVAL']
|
||||
self.gt_set = gt_set
|
||||
if not self.config['SKIP_SPLIT_FOL']:
|
||||
split_fol = gt_set
|
||||
else:
|
||||
split_fol = ''
|
||||
self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
|
||||
self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
|
||||
self.should_classes_combine = False
|
||||
self.use_super_categories = False
|
||||
self.data_is_zipped = self.config['INPUT_AS_ZIP']
|
||||
self.do_preproc = self.config['DO_PREPROC']
|
||||
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
|
||||
# Get classes to eval
|
||||
self.valid_classes = ['pedestrian']
|
||||
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
for cls in self.config['CLASSES_TO_EVAL']]
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
|
||||
self.class_name_to_class_id = {'pedestrian': 1, 'person_on_vehicle': 2, 'car': 3, 'bicycle': 4, 'motorbike': 5,
|
||||
'non_mot_vehicle': 6, 'static_person': 7, 'distractor': 8, 'occluder': 9,
|
||||
'occluder_on_ground': 10, 'occluder_full': 11, 'reflection': 12, 'crowd': 13}
|
||||
self.valid_class_numbers = list(self.class_name_to_class_id.values())
|
||||
|
||||
# Get sequences to eval and check gt files exist
|
||||
self.seq_list, self.seq_lengths = self._get_seq_info()
|
||||
if len(self.seq_list) < 1:
|
||||
raise TrackEvalException('No sequences are selected to be evaluated.')
|
||||
|
||||
# Check gt files exist
|
||||
for seq in self.seq_list:
|
||||
if not self.data_is_zipped:
|
||||
curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
|
||||
if not os.path.isfile(curr_file):
|
||||
print('GT file not found ' + curr_file)
|
||||
raise TrackEvalException('GT file not found for sequence: ' + seq)
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('GT file not found ' + curr_file)
|
||||
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
for tracker in self.tracker_list:
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
|
||||
else:
|
||||
for seq in self.seq_list:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException(
|
||||
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
|
||||
curr_file))
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _get_seq_info(self):
|
||||
seq_list = []
|
||||
seq_lengths = {}
|
||||
if self.config["SEQ_INFO"]:
|
||||
seq_list = list(self.config["SEQ_INFO"].keys())
|
||||
seq_lengths = self.config["SEQ_INFO"]
|
||||
|
||||
# If sequence length is 'None' tries to read sequence length from .ini files.
|
||||
for seq, seq_length in seq_lengths.items():
|
||||
if seq_length is None:
|
||||
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
|
||||
if not os.path.isfile(ini_file):
|
||||
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
|
||||
ini_data = configparser.ConfigParser()
|
||||
ini_data.read(ini_file)
|
||||
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
|
||||
|
||||
else:
|
||||
if self.config["SEQMAP_FILE"]:
|
||||
seqmap_file = self.config["SEQMAP_FILE"]
|
||||
else:
|
||||
if self.config["SEQMAP_FOLDER"] is None:
|
||||
seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
|
||||
else:
|
||||
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
|
||||
if not os.path.isfile(seqmap_file):
|
||||
print('no seqmap found: ' + seqmap_file)
|
||||
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
|
||||
with open(seqmap_file) as fp:
|
||||
reader = csv.reader(fp)
|
||||
for i, row in enumerate(reader):
|
||||
if i == 0 or row[0] == '':
|
||||
continue
|
||||
seq = row[0]
|
||||
seq_list.append(seq)
|
||||
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
|
||||
if not os.path.isfile(ini_file):
|
||||
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
|
||||
ini_data = configparser.ConfigParser()
|
||||
ini_data.read(ini_file)
|
||||
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
|
||||
return seq_list, seq_lengths
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the MOT Challenge 2D box format
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
|
||||
[gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
"""
|
||||
# File location
|
||||
if self.data_is_zipped:
|
||||
if is_gt:
|
||||
zip_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
else:
|
||||
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
file = seq + '.txt'
|
||||
else:
|
||||
zip_file = None
|
||||
if is_gt:
|
||||
file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
|
||||
else:
|
||||
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
|
||||
# Ignore regions
|
||||
if is_gt:
|
||||
crowd_ignore_filter = {7: ['13']}
|
||||
else:
|
||||
crowd_ignore_filter = None
|
||||
|
||||
# Load raw data from text file
|
||||
read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file, crowd_ignore_filter=crowd_ignore_filter)
|
||||
|
||||
# Convert data to required format
|
||||
num_timesteps = self.seq_lengths[seq]
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if is_gt:
|
||||
data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
|
||||
else:
|
||||
data_keys += ['tracker_confidences']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
|
||||
# Check for any extra time keys
|
||||
current_time_keys = [str( t+ 1) for t in range(num_timesteps)]
|
||||
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
|
||||
if len(extra_time_keys) > 0:
|
||||
if is_gt:
|
||||
text = 'Ground-truth'
|
||||
else:
|
||||
text = 'Tracking'
|
||||
raise TrackEvalException(
|
||||
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
|
||||
[str(x) + ', ' for x in extra_time_keys]))
|
||||
|
||||
for t in range(num_timesteps):
|
||||
time_key = str(t+1)
|
||||
if time_key in read_data.keys():
|
||||
try:
|
||||
time_data = np.asarray(read_data[time_key], dtype=np.float)
|
||||
except ValueError:
|
||||
if is_gt:
|
||||
raise TrackEvalException(
|
||||
'Cannot convert gt data for sequence %s to float. Is data corrupted?' % seq)
|
||||
else:
|
||||
raise TrackEvalException(
|
||||
'Cannot convert tracking data from tracker %s, sequence %s to float. Is data corrupted?' % (
|
||||
tracker, seq))
|
||||
try:
|
||||
raw_data['dets'][t] = np.atleast_2d(time_data[:, 2:6])
|
||||
raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
|
||||
except IndexError:
|
||||
if is_gt:
|
||||
err = 'Cannot load gt data from sequence %s, because there is not enough ' \
|
||||
'columns in the data.' % seq
|
||||
raise TrackEvalException(err)
|
||||
else:
|
||||
err = 'Cannot load tracker data from tracker %s, sequence %s, because there is not enough ' \
|
||||
'columns in the data.' % (tracker, seq)
|
||||
raise TrackEvalException(err)
|
||||
if time_data.shape[1] >= 8:
|
||||
raw_data['classes'][t] = np.atleast_1d(time_data[:, 7]).astype(int)
|
||||
else:
|
||||
if not is_gt:
|
||||
raw_data['classes'][t] = np.ones_like(raw_data['ids'][t])
|
||||
else:
|
||||
raise TrackEvalException(
|
||||
'GT data is not in a valid format, there is not enough rows in seq %s, timestep %i.' % (
|
||||
seq, t))
|
||||
if is_gt:
|
||||
gt_extras_dict = {'zero_marked': np.atleast_1d(time_data[:, 6].astype(int))}
|
||||
raw_data['gt_extras'][t] = gt_extras_dict
|
||||
else:
|
||||
raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 6])
|
||||
else:
|
||||
raw_data['dets'][t] = np.empty((0, 4))
|
||||
raw_data['ids'][t] = np.empty(0).astype(int)
|
||||
raw_data['classes'][t] = np.empty(0).astype(int)
|
||||
if is_gt:
|
||||
gt_extras_dict = {'zero_marked': np.empty(0)}
|
||||
raw_data['gt_extras'][t] = gt_extras_dict
|
||||
else:
|
||||
raw_data['tracker_confidences'][t] = np.empty(0)
|
||||
if is_gt:
|
||||
if time_key in ignore_data.keys():
|
||||
time_ignore = np.asarray(ignore_data[time_key], dtype=np.float)
|
||||
raw_data['gt_crowd_ignore_regions'][t] = np.atleast_2d(time_ignore[:, 2:6])
|
||||
else:
|
||||
raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
raw_data['num_timesteps'] = num_timesteps
|
||||
raw_data['seq'] = seq
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
|
||||
MOT Challenge:
|
||||
In MOT Challenge, the 4 preproc steps are as follow:
|
||||
1) There is only one class (pedestrian) to be evaluated, but all other classes are used for preproc.
|
||||
2) Predictions are matched against all gt boxes (regardless of class), those matching with distractor
|
||||
objects are removed.
|
||||
3) There is no crowd ignore regions.
|
||||
4) All gt dets except pedestrian are removed, also removes pedestrian gt dets marked with zero_marked.
|
||||
"""
|
||||
# Check that input data has unique ids
|
||||
self._check_unique_ids(raw_data)
|
||||
|
||||
distractor_class_names = ['person_on_vehicle', 'static_person', 'distractor', 'reflection']
|
||||
if self.benchmark == 'MOT20':
|
||||
distractor_class_names.append('non_mot_vehicle')
|
||||
distractor_classes = [self.class_name_to_class_id[x] for x in distractor_class_names]
|
||||
cls_id = self.class_name_to_class_id[cls]
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Get all data
|
||||
gt_ids = raw_data['gt_ids'][t]
|
||||
gt_dets = raw_data['gt_dets'][t]
|
||||
gt_classes = raw_data['gt_classes'][t]
|
||||
gt_zero_marked = raw_data['gt_extras'][t]['zero_marked']
|
||||
|
||||
tracker_ids = raw_data['tracker_ids'][t]
|
||||
tracker_dets = raw_data['tracker_dets'][t]
|
||||
tracker_classes = raw_data['tracker_classes'][t]
|
||||
tracker_confidences = raw_data['tracker_confidences'][t]
|
||||
similarity_scores = raw_data['similarity_scores'][t]
|
||||
crowd_ignore_regions = raw_data['gt_crowd_ignore_regions'][t]
|
||||
|
||||
# Evaluation is ONLY valid for pedestrian class
|
||||
if len(tracker_classes) > 0 and np.max(tracker_classes) > 1:
|
||||
raise TrackEvalException(
|
||||
'Evaluation is only valid for pedestrian class. Non pedestrian class (%i) found in sequence %s at '
|
||||
'timestep %i.' % (np.max(tracker_classes), raw_data['seq'], t))
|
||||
|
||||
# Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
|
||||
# which are labeled as belonging to a distractor class.
|
||||
to_remove_tracker = np.array([], np.int)
|
||||
if self.do_preproc and self.benchmark != 'MOT15' and (gt_ids.shape[0] > 0 or len(crowd_ignore_regions) > 0) and tracker_ids.shape[0] > 0:
|
||||
|
||||
# Check all classes are valid:
|
||||
invalid_classes = np.setdiff1d(np.unique(gt_classes), self.valid_class_numbers)
|
||||
if len(invalid_classes) > 0:
|
||||
print(' '.join([str(x) for x in invalid_classes]))
|
||||
raise(TrackEvalException('Attempting to evaluate using invalid gt classes. '
|
||||
'This warning only triggers if preprocessing is performed, '
|
||||
'e.g. not for MOT15 or where prepropressing is explicitly disabled. '
|
||||
'Please either check your gt data, or disable preprocessing. '
|
||||
'The following invalid classes were found in timestep ' + str(t) + ': ' +
|
||||
' '.join([str(x) for x in invalid_classes])))
|
||||
|
||||
matching_scores = similarity_scores.copy()
|
||||
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
|
||||
match_rows, match_cols = linear_sum_assignment(-matching_scores)
|
||||
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
match_rows = match_rows[actually_matched_mask]
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
|
||||
is_distractor_class = np.isin(gt_classes[match_rows], distractor_classes)
|
||||
to_remove_tracker = match_cols[is_distractor_class]
|
||||
|
||||
# remove bounding boxes that overlap with crowd ignore region.
|
||||
intersection_with_ignore_region = self._calculate_box_ious(tracker_dets, crowd_ignore_regions, box_format='xywh', do_ioa=True)
|
||||
is_within_crowd_ignore_region = np.any(intersection_with_ignore_region > 0.95 + np.finfo('float').eps, axis=1)
|
||||
to_remove_tracker = np.unique(np.concatenate([to_remove_tracker, np.where(is_within_crowd_ignore_region)[0]]))
|
||||
|
||||
# Apply preprocessing to remove all unwanted tracker dets.
|
||||
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
|
||||
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
|
||||
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
|
||||
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
|
||||
|
||||
# Remove gt detections marked as to remove (zero marked), and also remove gt detections not in pedestrian
|
||||
# class (not applicable for MOT15)
|
||||
if self.do_preproc and self.benchmark != 'MOT15':
|
||||
gt_to_keep_mask = (np.not_equal(gt_zero_marked, 0)) & \
|
||||
(np.equal(gt_classes, cls_id))
|
||||
else:
|
||||
# There are no classes for MOT15
|
||||
gt_to_keep_mask = np.not_equal(gt_zero_marked, 0)
|
||||
data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
|
||||
data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
|
||||
data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
data['seq'] = raw_data['seq']
|
||||
|
||||
# Ensure again that ids are unique per timestep after preproc.
|
||||
self._check_unique_ids(data, after_preproc=True)
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='xywh')
|
||||
return similarity_scores
|
||||
508
test/yolov7-tracker/tracker/trackeval/datasets/rob_mots.py
Normal file
508
test/yolov7-tracker/tracker/trackeval/datasets/rob_mots.py
Normal file
@@ -0,0 +1,508 @@
|
||||
|
||||
import os
|
||||
import csv
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ._base_dataset import _BaseDataset
|
||||
from .. import utils
|
||||
from ..utils import TrackEvalException
|
||||
from .. import _timing
|
||||
from ..datasets.rob_mots_classmap import cls_id_to_name
|
||||
|
||||
|
||||
class RobMOTS(_BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/rob_mots'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/rob_mots'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'SUB_BENCHMARK': None, # REQUIRED. Sub-benchmark to eval. If None, then error.
|
||||
# ['mots_challenge', 'kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'waymo', 'tao']
|
||||
'CLASSES_TO_EVAL': None, # List of classes to eval. If None, then it does all COCO classes.
|
||||
'SPLIT_TO_EVAL': 'train', # valid: ['train', 'val', 'test']
|
||||
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'OUTPUT_SUB_FOLDER': 'results', # Output files are saved in OUTPUT_FOLDER/DATA_LOC_FORMAT/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/DATA_LOC_FORMAT/TRACKER_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/dataset_subfolder/seqmaps)
|
||||
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use SEQMAP_FOLDER/BENCHMARK_SPLIT_TO_EVAL)
|
||||
'CLSMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/dataset_subfolder/clsmaps)
|
||||
'CLSMAP_FILE': None, # Directly specify seqmap file (if none use CLSMAP_FOLDER/BENCHMARK_SPLIT_TO_EVAL)
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config())
|
||||
|
||||
self.split = self.config['SPLIT_TO_EVAL']
|
||||
valid_benchmarks = ['mots_challenge', 'kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'waymo', 'tao']
|
||||
self.box_gt_benchmarks = ['waymo', 'tao']
|
||||
|
||||
self.sub_benchmark = self.config['SUB_BENCHMARK']
|
||||
if not self.sub_benchmark:
|
||||
raise TrackEvalException('SUB_BENCHMARK config input is required (there is no default value)' +
|
||||
', '.join(valid_benchmarks) + ' are valid.')
|
||||
if self.sub_benchmark not in valid_benchmarks:
|
||||
raise TrackEvalException('Attempted to evaluate an invalid benchmark: ' + self.sub_benchmark + '. Only benchmarks ' +
|
||||
', '.join(valid_benchmarks) + ' are valid.')
|
||||
|
||||
self.gt_fol = self.config['GT_FOLDER']
|
||||
self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], self.config['SPLIT_TO_EVAL'])
|
||||
self.data_is_zipped = self.config['INPUT_AS_ZIP']
|
||||
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
self.output_sub_fol = os.path.join(self.config['OUTPUT_SUB_FOLDER'], self.sub_benchmark)
|
||||
|
||||
# Loops through all sub-benchmarks, and reads in seqmaps to info on all sequences to eval.
|
||||
self._get_seq_info()
|
||||
|
||||
if len(self.seq_list) < 1:
|
||||
raise TrackEvalException('No sequences are selected to be evaluated.')
|
||||
|
||||
valid_class_ids = np.atleast_1d(np.genfromtxt(os.path.join(self.gt_fol, self.split, self.sub_benchmark,
|
||||
'clsmap.txt')))
|
||||
valid_classes = [cls_id_to_name[int(x)] for x in valid_class_ids] + ['all']
|
||||
self.valid_class_ids = valid_class_ids
|
||||
self.class_name_to_class_id = {cls_name: cls_id for cls_id, cls_name in cls_id_to_name.items()}
|
||||
self.class_name_to_class_id['all'] = -1
|
||||
if not self.config['CLASSES_TO_EVAL']:
|
||||
self.class_list = valid_classes
|
||||
else:
|
||||
self.class_list = [cls if cls in valid_classes else None
|
||||
for cls in self.config['CLASSES_TO_EVAL']]
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
|
||||
', '.join(valid_classes) + ' are valid.')
|
||||
|
||||
# Check gt files exist
|
||||
for seq in self.seq_list:
|
||||
if not self.data_is_zipped:
|
||||
curr_file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'data', seq + '.txt')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('GT file not found ' + curr_file)
|
||||
raise TrackEvalException('GT file not found for sequence: ' + seq)
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'data.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
for tracker in self.tracker_list:
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, 'data.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
raise TrackEvalException('Tracker file not found: ' + os.path.basename(curr_file))
|
||||
else:
|
||||
for seq in self.seq_list:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, self.sub_benchmark, seq
|
||||
+ '.txt')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException(
|
||||
'Tracker file not found: ' + self.sub_benchmark + '/' + os.path.basename(curr_file))
|
||||
|
||||
def get_name(self):
|
||||
return self.get_class_name() + '.' + self.sub_benchmark
|
||||
|
||||
def _get_seq_info(self):
|
||||
self.seq_list = []
|
||||
self.seq_lengths = {}
|
||||
self.seq_sizes = {}
|
||||
self.seq_ignore_class_ids = {}
|
||||
if self.config["SEQMAP_FILE"]:
|
||||
seqmap_file = self.config["SEQMAP_FILE"]
|
||||
else:
|
||||
if self.config["SEQMAP_FOLDER"] is None:
|
||||
seqmap_file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'seqmap.txt')
|
||||
else:
|
||||
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.split + '.seqmap')
|
||||
if not os.path.isfile(seqmap_file):
|
||||
print('no seqmap found: ' + seqmap_file)
|
||||
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
|
||||
with open(seqmap_file) as fp:
|
||||
dialect = csv.Sniffer().sniff(fp.readline(), delimiters=' ')
|
||||
fp.seek(0)
|
||||
reader = csv.reader(fp, dialect)
|
||||
for i, row in enumerate(reader):
|
||||
if len(row) >= 4:
|
||||
# first col: sequence, second col: sequence length, third and fourth col: sequence height/width
|
||||
# The rest of the columns list the 'sequence ignore class ids' which are classes not penalized as
|
||||
# FPs for this sequence.
|
||||
seq = row[0]
|
||||
self.seq_list.append(seq)
|
||||
self.seq_lengths[seq] = int(row[1])
|
||||
self.seq_sizes[seq] = (int(row[2]), int(row[3]))
|
||||
self.seq_ignore_class_ids[seq] = [int(row[x]) for x in range(4, len(row))]
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the unified RobMOTS format.
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
"""
|
||||
# import to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
|
||||
# File location
|
||||
if self.data_is_zipped:
|
||||
if is_gt:
|
||||
zip_file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'data.zip')
|
||||
else:
|
||||
zip_file = os.path.join(self.tracker_fol, tracker, 'data.zip')
|
||||
file = seq + '.txt'
|
||||
else:
|
||||
zip_file = None
|
||||
if is_gt:
|
||||
file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'data', seq + '.txt')
|
||||
else:
|
||||
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, self.sub_benchmark, seq + '.txt')
|
||||
|
||||
# Load raw data from text file
|
||||
read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file,
|
||||
force_delimiters=' ')
|
||||
|
||||
# Convert data to required format
|
||||
num_timesteps = self.seq_lengths[seq]
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if not is_gt:
|
||||
data_keys += ['tracker_confidences']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
for t in range(num_timesteps):
|
||||
time_key = str(t)
|
||||
# list to collect all masks of a timestep to check for overlapping areas (for segmentation datasets)
|
||||
all_valid_masks = []
|
||||
if time_key in read_data.keys():
|
||||
try:
|
||||
raw_data['ids'][t] = np.atleast_1d([det[1] for det in read_data[time_key]]).astype(int)
|
||||
raw_data['classes'][t] = np.atleast_1d([det[2] for det in read_data[time_key]]).astype(int)
|
||||
if (not is_gt) or (self.sub_benchmark not in self.box_gt_benchmarks):
|
||||
raw_data['dets'][t] = [{'size': [int(region[4]), int(region[5])],
|
||||
'counts': region[6].encode(encoding='UTF-8')}
|
||||
for region in read_data[time_key]]
|
||||
all_valid_masks += [mask for mask, cls in zip(raw_data['dets'][t], raw_data['classes'][t]) if
|
||||
cls < 100]
|
||||
else:
|
||||
raw_data['dets'][t] = np.atleast_2d([det[4:8] for det in read_data[time_key]]).astype(float)
|
||||
|
||||
if not is_gt:
|
||||
raw_data['tracker_confidences'][t] = np.atleast_1d([det[3] for det
|
||||
in read_data[time_key]]).astype(float)
|
||||
except IndexError:
|
||||
self._raise_index_error(is_gt, self.sub_benchmark, seq)
|
||||
except ValueError:
|
||||
self._raise_value_error(is_gt, self.sub_benchmark, seq)
|
||||
# no detection in this timestep
|
||||
else:
|
||||
if (not is_gt) or (self.sub_benchmark not in self.box_gt_benchmarks):
|
||||
raw_data['dets'][t] = []
|
||||
else:
|
||||
raw_data['dets'][t] = np.empty((0, 4)).astype(float)
|
||||
raw_data['ids'][t] = np.empty(0).astype(int)
|
||||
raw_data['classes'][t] = np.empty(0).astype(int)
|
||||
if not is_gt:
|
||||
raw_data['tracker_confidences'][t] = np.empty(0).astype(float)
|
||||
|
||||
# check for overlapping masks
|
||||
if all_valid_masks:
|
||||
masks_merged = all_valid_masks[0]
|
||||
for mask in all_valid_masks[1:]:
|
||||
if mask_utils.area(mask_utils.merge([masks_merged, mask], intersect=True)) != 0.0:
|
||||
err = 'Overlapping masks in frame %d' % t
|
||||
raise TrackEvalException(err)
|
||||
masks_merged = mask_utils.merge([masks_merged, mask], intersect=False)
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
|
||||
raw_data['num_timesteps'] = num_timesteps
|
||||
raw_data['frame_size'] = self.seq_sizes[seq]
|
||||
raw_data['seq'] = seq
|
||||
return raw_data
|
||||
|
||||
@staticmethod
|
||||
def _raise_index_error(is_gt, sub_benchmark, seq):
|
||||
"""
|
||||
Auxiliary method to raise an evaluation error in case of an index error while reading files.
|
||||
:param is_gt: whether gt or tracker data is read
|
||||
:param tracker: the name of the tracker
|
||||
:param seq: the name of the seq
|
||||
:return: None
|
||||
"""
|
||||
if is_gt:
|
||||
err = 'Cannot load gt data from sequence %s, because there are not enough ' \
|
||||
'columns in the data.' % seq
|
||||
raise TrackEvalException(err)
|
||||
else:
|
||||
err = 'Cannot load tracker data from benchmark %s, sequence %s, because there are not enough ' \
|
||||
'columns in the data.' % (sub_benchmark, seq)
|
||||
raise TrackEvalException(err)
|
||||
|
||||
@staticmethod
|
||||
def _raise_value_error(is_gt, sub_benchmark, seq):
|
||||
"""
|
||||
Auxiliary method to raise an evaluation error in case of an value error while reading files.
|
||||
:param is_gt: whether gt or tracker data is read
|
||||
:param tracker: the name of the tracker
|
||||
:param seq: the name of the seq
|
||||
:return: None
|
||||
"""
|
||||
if is_gt:
|
||||
raise TrackEvalException(
|
||||
'GT data for sequence %s cannot be converted to the right format. Is data corrupted?' % seq)
|
||||
else:
|
||||
raise TrackEvalException(
|
||||
'Tracking data from benchmark %s, sequence %s cannot be converted to the right format. '
|
||||
'Is data corrupted?' % (sub_benchmark, seq))
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
Preprocessing (preproc) occurs in 3 steps.
|
||||
1) Extract only detections relevant for the class to be evaluated.
|
||||
2) Match gt dets and tracker dets. Tracker dets that are to a gt det (TPs) are marked as not to be
|
||||
removed.
|
||||
3) Remove unmatched tracker dets if they fall within an ignore region or are too small, or if that class
|
||||
is marked as an ignore class for that sequence.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
Note that there is a special 'all' class, which evaluates all of the COCO classes together in a
|
||||
'class agnostic' fashion.
|
||||
"""
|
||||
# import to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
|
||||
# Check that input data has unique ids
|
||||
self._check_unique_ids(raw_data)
|
||||
|
||||
cls_id = self.class_name_to_class_id[cls]
|
||||
ignore_class_id = cls_id+100
|
||||
seq = raw_data['seq']
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Only extract relevant dets for this class
|
||||
if cls == 'all':
|
||||
gt_class_mask = raw_data['gt_classes'][t] < 100
|
||||
# For waymo, combine predictions for [car, truck, bus, motorcycle] into car, because they are all annotated
|
||||
# together as one 'vehicle' class.
|
||||
elif self.sub_benchmark == 'waymo' and cls == 'car':
|
||||
waymo_vehicle_classes = np.array([3, 4, 6, 8])
|
||||
gt_class_mask = np.isin(raw_data['gt_classes'][t], waymo_vehicle_classes)
|
||||
else:
|
||||
gt_class_mask = raw_data['gt_classes'][t] == cls_id
|
||||
gt_class_mask = gt_class_mask.astype(np.bool)
|
||||
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
|
||||
if cls == 'all':
|
||||
ignore_regions_mask = raw_data['gt_classes'][t] >= 100
|
||||
else:
|
||||
ignore_regions_mask = raw_data['gt_classes'][t] == ignore_class_id
|
||||
ignore_regions_mask = np.logical_or(ignore_regions_mask, raw_data['gt_classes'][t] == 100)
|
||||
if self.sub_benchmark in self.box_gt_benchmarks:
|
||||
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
|
||||
ignore_regions_box = raw_data['gt_dets'][t][ignore_regions_mask]
|
||||
if len(ignore_regions_box) > 0:
|
||||
ignore_regions_box[:, 2] = ignore_regions_box[:, 2] - ignore_regions_box[:, 0]
|
||||
ignore_regions_box[:, 3] = ignore_regions_box[:, 3] - ignore_regions_box[:, 1]
|
||||
ignore_regions = mask_utils.frPyObjects(ignore_regions_box, self.seq_sizes[seq][0], self.seq_sizes[seq][1])
|
||||
else:
|
||||
ignore_regions = []
|
||||
else:
|
||||
gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
|
||||
ignore_regions = [raw_data['gt_dets'][t][ind] for ind in range(len(ignore_regions_mask)) if
|
||||
ignore_regions_mask[ind]]
|
||||
|
||||
if cls == 'all':
|
||||
tracker_class_mask = np.ones_like(raw_data['tracker_classes'][t])
|
||||
else:
|
||||
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
|
||||
tracker_class_mask = tracker_class_mask.astype(np.bool)
|
||||
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
|
||||
tracker_dets = [raw_data['tracker_dets'][t][ind] for ind in range(len(tracker_class_mask)) if
|
||||
tracker_class_mask[ind]]
|
||||
tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
|
||||
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
|
||||
tracker_classes = raw_data['tracker_classes'][t][tracker_class_mask]
|
||||
|
||||
# Only do preproc if there are ignore regions defined to remove
|
||||
if tracker_ids.shape[0] > 0:
|
||||
|
||||
# Match tracker and gt dets (with hungarian algorithm)
|
||||
unmatched_indices = np.arange(tracker_ids.shape[0])
|
||||
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
|
||||
matching_scores = similarity_scores.copy()
|
||||
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
|
||||
match_rows, match_cols = linear_sum_assignment(-matching_scores)
|
||||
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
# match_rows = match_rows[actually_matched_mask]
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
|
||||
|
||||
# For unmatched tracker dets remove those that are greater than 50% within an ignore region.
|
||||
# unmatched_tracker_dets = tracker_dets[unmatched_indices, :]
|
||||
# crowd_ignore_regions = raw_data['gt_ignore_regions'][t]
|
||||
# intersection_with_ignore_region = self. \
|
||||
# _calculate_box_ious(unmatched_tracker_dets, crowd_ignore_regions, box_format='x0y0x1y1',
|
||||
# do_ioa=True)
|
||||
|
||||
|
||||
if cls_id in self.seq_ignore_class_ids[seq]:
|
||||
# Remove unmatched detections for classes that are marked as 'ignore' for the whole sequence.
|
||||
to_remove_tracker = unmatched_indices
|
||||
else:
|
||||
unmatched_tracker_dets = [tracker_dets[i] for i in range(len(tracker_dets)) if
|
||||
i in unmatched_indices]
|
||||
|
||||
# For unmatched tracker dets remove those that are too small.
|
||||
tracker_boxes_t = mask_utils.toBbox(unmatched_tracker_dets)
|
||||
unmatched_widths = tracker_boxes_t[:, 2]
|
||||
unmatched_heights = tracker_boxes_t[:, 3]
|
||||
unmatched_size = np.maximum(unmatched_heights, unmatched_widths)
|
||||
min_size = np.min(self.seq_sizes[seq])/8
|
||||
is_too_small = unmatched_size <= min_size + np.finfo('float').eps
|
||||
|
||||
# For unmatched tracker dets remove those that are greater than 50% within an ignore region.
|
||||
if ignore_regions:
|
||||
ignore_region_merged = ignore_regions[0]
|
||||
for mask in ignore_regions[1:]:
|
||||
ignore_region_merged = mask_utils.merge([ignore_region_merged, mask], intersect=False)
|
||||
intersection_with_ignore_region = self. \
|
||||
_calculate_mask_ious(unmatched_tracker_dets, [ignore_region_merged], is_encoded=True, do_ioa=True)
|
||||
is_within_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps, axis=1)
|
||||
to_remove_tracker = unmatched_indices[np.logical_or(is_too_small, is_within_ignore_region)]
|
||||
else:
|
||||
to_remove_tracker = unmatched_indices[is_too_small]
|
||||
|
||||
# For the special 'all' class, you need to remove unmatched detections from all ignore classes and
|
||||
# non-evaluated classes.
|
||||
if cls == 'all':
|
||||
unmatched_tracker_classes = [tracker_classes[i] for i in range(len(tracker_classes)) if
|
||||
i in unmatched_indices]
|
||||
is_ignore_class = np.isin(unmatched_tracker_classes, self.seq_ignore_class_ids[seq])
|
||||
is_not_evaled_class = np.logical_not(np.isin(unmatched_tracker_classes, self.valid_class_ids))
|
||||
to_remove_all = unmatched_indices[np.logical_or(is_ignore_class, is_not_evaled_class)]
|
||||
to_remove_tracker = np.concatenate([to_remove_tracker, to_remove_all], axis=0)
|
||||
else:
|
||||
to_remove_tracker = np.array([], dtype=np.int)
|
||||
|
||||
# remove all unwanted tracker detections
|
||||
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
|
||||
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
|
||||
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
|
||||
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
|
||||
|
||||
# keep all ground truth detections
|
||||
data['gt_ids'][t] = gt_ids
|
||||
data['gt_dets'][t] = gt_dets
|
||||
data['similarity_scores'][t] = similarity_scores
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
data['seq'] = raw_data['seq']
|
||||
data['frame_size'] = raw_data['frame_size']
|
||||
|
||||
# Ensure that ids are unique per timestep.
|
||||
self._check_unique_ids(data, after_preproc=True)
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
|
||||
# Only loaded when run to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
|
||||
if self.sub_benchmark in self.box_gt_benchmarks:
|
||||
# Convert tracker masks to bboxes (for benchmarks with only bbox ground-truth),
|
||||
# and then convert to x0y0x1y1 format.
|
||||
tracker_boxes_t = mask_utils.toBbox(tracker_dets_t)
|
||||
tracker_boxes_t[:, 2] = tracker_boxes_t[:, 0] + tracker_boxes_t[:, 2]
|
||||
tracker_boxes_t[:, 3] = tracker_boxes_t[:, 1] + tracker_boxes_t[:, 3]
|
||||
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_boxes_t, box_format='x0y0x1y1')
|
||||
else:
|
||||
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
|
||||
return similarity_scores
|
||||
@@ -0,0 +1,81 @@
|
||||
cls_id_to_name = {
|
||||
1: 'person',
|
||||
2: 'bicycle',
|
||||
3: 'car',
|
||||
4: 'motorcycle',
|
||||
5: 'airplane',
|
||||
6: 'bus',
|
||||
7: 'train',
|
||||
8: 'truck',
|
||||
9: 'boat',
|
||||
10: 'traffic light',
|
||||
11: 'fire hydrant',
|
||||
12: 'stop sign',
|
||||
13: 'parking meter',
|
||||
14: 'bench',
|
||||
15: 'bird',
|
||||
16: 'cat',
|
||||
17: 'dog',
|
||||
18: 'horse',
|
||||
19: 'sheep',
|
||||
20: 'cow',
|
||||
21: 'elephant',
|
||||
22: 'bear',
|
||||
23: 'zebra',
|
||||
24: 'giraffe',
|
||||
25: 'backpack',
|
||||
26: 'umbrella',
|
||||
27: 'handbag',
|
||||
28: 'tie',
|
||||
29: 'suitcase',
|
||||
30: 'frisbee',
|
||||
31: 'skis',
|
||||
32: 'snowboard',
|
||||
33: 'sports ball',
|
||||
34: 'kite',
|
||||
35: 'baseball bat',
|
||||
36: 'baseball glove',
|
||||
37: 'skateboard',
|
||||
38: 'surfboard',
|
||||
39: 'tennis racket',
|
||||
40: 'bottle',
|
||||
41: 'wine glass',
|
||||
42: 'cup',
|
||||
43: 'fork',
|
||||
44: 'knife',
|
||||
45: 'spoon',
|
||||
46: 'bowl',
|
||||
47: 'banana',
|
||||
48: 'apple',
|
||||
49: 'sandwich',
|
||||
50: 'orange',
|
||||
51: 'broccoli',
|
||||
52: 'carrot',
|
||||
53: 'hot dog',
|
||||
54: 'pizza',
|
||||
55: 'donut',
|
||||
56: 'cake',
|
||||
57: 'chair',
|
||||
58: 'couch',
|
||||
59: 'potted plant',
|
||||
60: 'bed',
|
||||
61: 'dining table',
|
||||
62: 'toilet',
|
||||
63: 'tv',
|
||||
64: 'laptop',
|
||||
65: 'mouse',
|
||||
66: 'remote',
|
||||
67: 'keyboard',
|
||||
68: 'cell phone',
|
||||
69: 'microwave',
|
||||
70: 'oven',
|
||||
71: 'toaster',
|
||||
72: 'sink',
|
||||
73: 'refrigerator',
|
||||
74: 'book',
|
||||
75: 'clock',
|
||||
76: 'vase',
|
||||
77: 'scissors',
|
||||
78: 'teddy bear',
|
||||
79: 'hair drier',
|
||||
80: 'toothbrush'}
|
||||
113
test/yolov7-tracker/tracker/trackeval/datasets/run_rob_mots.py
Normal file
113
test/yolov7-tracker/tracker/trackeval/datasets/run_rob_mots.py
Normal file
@@ -0,0 +1,113 @@
|
||||
|
||||
# python3 scripts\run_rob_mots.py --ROBMOTS_SPLIT val --TRACKERS_TO_EVAL tracker_name (e.g. STP) --USE_PARALLEL True --NUM_PARALLEL_CORES 4
|
||||
|
||||
import sys
|
||||
import os
|
||||
import csv
|
||||
import numpy as np
|
||||
from multiprocessing import freeze_support
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
import trackeval # noqa: E402
|
||||
from trackeval import utils
|
||||
code_path = utils.get_code_path()
|
||||
|
||||
if __name__ == '__main__':
|
||||
freeze_support()
|
||||
|
||||
script_config = {
|
||||
'ROBMOTS_SPLIT': 'train', # 'train', # valid: 'train', 'val', 'test', 'test_live', 'test_post', 'test_all'
|
||||
'BENCHMARKS': ['kitti_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'tao'], # 'bdd_mots' coming soon
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/rob_mots'),
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/rob_mots'),
|
||||
}
|
||||
|
||||
default_eval_config = trackeval.Evaluator.get_default_eval_config()
|
||||
default_eval_config['PRINT_ONLY_COMBINED'] = True
|
||||
default_eval_config['DISPLAY_LESS_PROGRESS'] = True
|
||||
default_dataset_config = trackeval.datasets.RobMOTS.get_default_dataset_config()
|
||||
config = {**default_eval_config, **default_dataset_config, **script_config}
|
||||
|
||||
# Command line interface:
|
||||
config = utils.update_config(config)
|
||||
|
||||
if config['ROBMOTS_SPLIT'] == 'val':
|
||||
config['BENCHMARKS'] = ['kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis',
|
||||
'tao', 'mots_challenge']
|
||||
config['SPLIT_TO_EVAL'] = 'val'
|
||||
elif config['ROBMOTS_SPLIT'] == 'test' or config['SPLIT_TO_EVAL'] == 'test_live':
|
||||
config['BENCHMARKS'] = ['kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'tao']
|
||||
config['SPLIT_TO_EVAL'] = 'test'
|
||||
elif config['ROBMOTS_SPLIT'] == 'test_post':
|
||||
config['BENCHMARKS'] = ['mots_challenge', 'waymo']
|
||||
config['SPLIT_TO_EVAL'] = 'test'
|
||||
elif config['ROBMOTS_SPLIT'] == 'test_all':
|
||||
config['BENCHMARKS'] = ['kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis',
|
||||
'tao', 'mots_challenge', 'waymo']
|
||||
config['SPLIT_TO_EVAL'] = 'test'
|
||||
elif config['ROBMOTS_SPLIT'] == 'train':
|
||||
config['BENCHMARKS'] = ['kitti_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'tao'] # 'bdd_mots' coming soon
|
||||
config['SPLIT_TO_EVAL'] = 'train'
|
||||
|
||||
metrics_config = {'METRICS': ['HOTA']}
|
||||
# metrics_config = {'METRICS': ['HOTA', 'CLEAR', 'Identity']}
|
||||
eval_config = {k: v for k, v in config.items() if k in config.keys()}
|
||||
dataset_config = {k: v for k, v in config.items() if k in config.keys()}
|
||||
|
||||
# Run code
|
||||
dataset_list = []
|
||||
for bench in config['BENCHMARKS']:
|
||||
dataset_config['SUB_BENCHMARK'] = bench
|
||||
dataset_list.append(trackeval.datasets.RobMOTS(dataset_config))
|
||||
evaluator = trackeval.Evaluator(eval_config)
|
||||
metrics_list = []
|
||||
for metric in [trackeval.metrics.HOTA, trackeval.metrics.CLEAR, trackeval.metrics.Identity]:
|
||||
if metric.get_name() in metrics_config['METRICS']:
|
||||
metrics_list.append(metric())
|
||||
if len(metrics_list) == 0:
|
||||
raise Exception('No metrics selected for evaluation')
|
||||
output_res, output_msg = evaluator.evaluate(dataset_list, metrics_list)
|
||||
|
||||
|
||||
# For each benchmark, combine the 'all' score with the 'cls_averaged' using geometric mean.
|
||||
metrics_to_calc = ['HOTA', 'DetA', 'AssA', 'DetRe', 'DetPr', 'AssRe', 'AssPr', 'LocA']
|
||||
trackers = list(output_res['RobMOTS.' + config['BENCHMARKS'][0]].keys())
|
||||
for tracker in trackers:
|
||||
# final_results[benchmark][result_type][metric]
|
||||
final_results = {}
|
||||
res = {bench: output_res['RobMOTS.' + bench][tracker]['COMBINED_SEQ'] for bench in config['BENCHMARKS']}
|
||||
for bench in config['BENCHMARKS']:
|
||||
final_results[bench] = {'cls_av': {}, 'det_av': {}, 'final': {}}
|
||||
for metric in metrics_to_calc:
|
||||
final_results[bench]['cls_av'][metric] = np.mean(res[bench]['cls_comb_cls_av']['HOTA'][metric])
|
||||
final_results[bench]['det_av'][metric] = np.mean(res[bench]['all']['HOTA'][metric])
|
||||
final_results[bench]['final'][metric] = \
|
||||
np.sqrt(final_results[bench]['cls_av'][metric] * final_results[bench]['det_av'][metric])
|
||||
|
||||
# Take the arithmetic mean over all the benchmarks
|
||||
final_results['overall'] = {'cls_av': {}, 'det_av': {}, 'final': {}}
|
||||
for metric in metrics_to_calc:
|
||||
final_results['overall']['cls_av'][metric] = \
|
||||
np.mean([final_results[bench]['cls_av'][metric] for bench in config['BENCHMARKS']])
|
||||
final_results['overall']['det_av'][metric] = \
|
||||
np.mean([final_results[bench]['det_av'][metric] for bench in config['BENCHMARKS']])
|
||||
final_results['overall']['final'][metric] = \
|
||||
np.mean([final_results[bench]['final'][metric] for bench in config['BENCHMARKS']])
|
||||
|
||||
# Save out result
|
||||
headers = [config['SPLIT_TO_EVAL']] + [x + '___' + metric for x in ['f', 'c', 'd'] for metric in metrics_to_calc]
|
||||
|
||||
def rowify(d):
|
||||
return [d[x][metric] for x in ['final', 'cls_av', 'det_av'] for metric in metrics_to_calc]
|
||||
|
||||
out_file = os.path.join(script_config['TRACKERS_FOLDER'], script_config['ROBMOTS_SPLIT'], tracker,
|
||||
'final_results.csv')
|
||||
|
||||
with open(out_file, 'w', newline='') as f:
|
||||
writer = csv.writer(f, delimiter=',')
|
||||
writer.writerow(headers)
|
||||
writer.writerow(['overall'] + rowify(final_results['overall']))
|
||||
for bench in config['BENCHMARKS']:
|
||||
if bench == 'overall':
|
||||
continue
|
||||
writer.writerow([bench] + rowify(final_results[bench]))
|
||||
566
test/yolov7-tracker/tracker/trackeval/datasets/tao.py
Normal file
566
test/yolov7-tracker/tracker/trackeval/datasets/tao.py
Normal file
@@ -0,0 +1,566 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import json
|
||||
import itertools
|
||||
from collections import defaultdict
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ..utils import TrackEvalException
|
||||
from ._base_dataset import _BaseDataset
|
||||
from .. import utils
|
||||
from .. import _timing
|
||||
|
||||
|
||||
class TAO(_BaseDataset):
|
||||
"""Dataset class for TAO tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes)
|
||||
'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val'
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
'MAX_DETECTIONS': 300, # Number of maximal allowed detections per image (0 for unlimited)
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
self.gt_fol = self.config['GT_FOLDER']
|
||||
self.tracker_fol = self.config['TRACKERS_FOLDER']
|
||||
self.should_classes_combine = True
|
||||
self.use_super_categories = False
|
||||
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
|
||||
gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
|
||||
if len(gt_dir_files) != 1:
|
||||
raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
|
||||
|
||||
with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
|
||||
self.gt_data = json.load(f)
|
||||
|
||||
# merge categories marked with a merged tag in TAO dataset
|
||||
self._merge_categories(self.gt_data['annotations'] + self.gt_data['tracks'])
|
||||
|
||||
# Get sequences to eval and sequence information
|
||||
self.seq_list = [vid['name'].replace('/', '-') for vid in self.gt_data['videos']]
|
||||
self.seq_name_to_seq_id = {vid['name'].replace('/', '-'): vid['id'] for vid in self.gt_data['videos']}
|
||||
# compute mappings from videos to annotation data
|
||||
self.videos_to_gt_tracks, self.videos_to_gt_images = self._compute_vid_mappings(self.gt_data['annotations'])
|
||||
# compute sequence lengths
|
||||
self.seq_lengths = {vid['id']: 0 for vid in self.gt_data['videos']}
|
||||
for img in self.gt_data['images']:
|
||||
self.seq_lengths[img['video_id']] += 1
|
||||
self.seq_to_images_to_timestep = self._compute_image_to_timestep_mappings()
|
||||
self.seq_to_classes = {vid['id']: {'pos_cat_ids': list({track['category_id'] for track
|
||||
in self.videos_to_gt_tracks[vid['id']]}),
|
||||
'neg_cat_ids': vid['neg_category_ids'],
|
||||
'not_exhaustively_labeled_cat_ids': vid['not_exhaustive_category_ids']}
|
||||
for vid in self.gt_data['videos']}
|
||||
|
||||
# Get classes to eval
|
||||
considered_vid_ids = [self.seq_name_to_seq_id[vid] for vid in self.seq_list]
|
||||
seen_cats = set([cat_id for vid_id in considered_vid_ids for cat_id
|
||||
in self.seq_to_classes[vid_id]['pos_cat_ids']])
|
||||
# only classes with ground truth are evaluated in TAO
|
||||
self.valid_classes = [cls['name'] for cls in self.gt_data['categories'] if cls['id'] in seen_cats]
|
||||
cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
|
||||
|
||||
if self.config['CLASSES_TO_EVAL']:
|
||||
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
for cls in self.config['CLASSES_TO_EVAL']]
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
|
||||
', '.join(self.valid_classes) +
|
||||
' are valid (classes present in ground truth data).')
|
||||
else:
|
||||
self.class_list = [cls for cls in self.valid_classes]
|
||||
self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
|
||||
|
||||
for tracker in self.tracker_list:
|
||||
tr_dir_files = [file for file in os.listdir(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol))
|
||||
if file.endswith('.json')]
|
||||
if len(tr_dir_files) != 1:
|
||||
raise TrackEvalException(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
|
||||
+ ' does not contain exactly one json file.')
|
||||
with open(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, tr_dir_files[0])) as f:
|
||||
curr_data = json.load(f)
|
||||
|
||||
# limit detections if MAX_DETECTIONS > 0
|
||||
if self.config['MAX_DETECTIONS']:
|
||||
curr_data = self._limit_dets_per_image(curr_data)
|
||||
|
||||
# fill missing video ids
|
||||
self._fill_video_ids_inplace(curr_data)
|
||||
|
||||
# make track ids unique over whole evaluation set
|
||||
self._make_track_ids_unique(curr_data)
|
||||
|
||||
# merge categories marked with a merged tag in TAO dataset
|
||||
self._merge_categories(curr_data)
|
||||
|
||||
# get tracker sequence information
|
||||
curr_videos_to_tracker_tracks, curr_videos_to_tracker_images = self._compute_vid_mappings(curr_data)
|
||||
self.tracker_data[tracker]['vids_to_tracks'] = curr_videos_to_tracker_tracks
|
||||
self.tracker_data[tracker]['vids_to_images'] = curr_videos_to_tracker_images
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the TAO format
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets]: list (for each timestep) of lists of detections.
|
||||
[classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
|
||||
keys and corresponding segmentations as values) for each track
|
||||
[classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_lengths]: dictionary with class values
|
||||
as keys and lists (for each track) as values
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
|
||||
keys and corresponding segmentations as values) for each track
|
||||
[classes_to_dt_track_ids, classes_to_dt_track_areas, classes_to_dt_track_lengths]: dictionary with class values
|
||||
as keys and lists as values
|
||||
[classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
|
||||
"""
|
||||
seq_id = self.seq_name_to_seq_id[seq]
|
||||
# File location
|
||||
if is_gt:
|
||||
imgs = self.videos_to_gt_images[seq_id]
|
||||
else:
|
||||
imgs = self.tracker_data[tracker]['vids_to_images'][seq_id]
|
||||
|
||||
# Convert data to required format
|
||||
num_timesteps = self.seq_lengths[seq_id]
|
||||
img_to_timestep = self.seq_to_images_to_timestep[seq_id]
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if not is_gt:
|
||||
data_keys += ['tracker_confidences']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
for img in imgs:
|
||||
# some tracker data contains images without any ground truth information, these are ignored
|
||||
try:
|
||||
t = img_to_timestep[img['id']]
|
||||
except KeyError:
|
||||
continue
|
||||
annotations = img['annotations']
|
||||
raw_data['dets'][t] = np.atleast_2d([ann['bbox'] for ann in annotations]).astype(float)
|
||||
raw_data['ids'][t] = np.atleast_1d([ann['track_id'] for ann in annotations]).astype(int)
|
||||
raw_data['classes'][t] = np.atleast_1d([ann['category_id'] for ann in annotations]).astype(int)
|
||||
if not is_gt:
|
||||
raw_data['tracker_confidences'][t] = np.atleast_1d([ann['score'] for ann in annotations]).astype(float)
|
||||
|
||||
for t, d in enumerate(raw_data['dets']):
|
||||
if d is None:
|
||||
raw_data['dets'][t] = np.empty((0, 4)).astype(float)
|
||||
raw_data['ids'][t] = np.empty(0).astype(int)
|
||||
raw_data['classes'][t] = np.empty(0).astype(int)
|
||||
if not is_gt:
|
||||
raw_data['tracker_confidences'][t] = np.empty(0)
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
|
||||
all_classes = [self.class_name_to_class_id[cls] for cls in self.class_list]
|
||||
if is_gt:
|
||||
classes_to_consider = all_classes
|
||||
all_tracks = self.videos_to_gt_tracks[seq_id]
|
||||
else:
|
||||
classes_to_consider = self.seq_to_classes[seq_id]['pos_cat_ids'] \
|
||||
+ self.seq_to_classes[seq_id]['neg_cat_ids']
|
||||
all_tracks = self.tracker_data[tracker]['vids_to_tracks'][seq_id]
|
||||
|
||||
classes_to_tracks = {cls: [track for track in all_tracks if track['category_id'] == cls]
|
||||
if cls in classes_to_consider else [] for cls in all_classes}
|
||||
|
||||
# mapping from classes to track information
|
||||
raw_data['classes_to_tracks'] = {cls: [{det['image_id']: np.atleast_1d(det['bbox'])
|
||||
for det in track['annotations']} for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_lengths'] = {cls: [len(track['annotations']) for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
|
||||
if not is_gt:
|
||||
raw_data['classes_to_dt_track_scores'] = {cls: np.array([np.mean([float(x['score'])
|
||||
for x in track['annotations']])
|
||||
for track in tracks])
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
|
||||
if is_gt:
|
||||
key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
|
||||
'classes_to_track_ids': 'classes_to_gt_track_ids',
|
||||
'classes_to_track_lengths': 'classes_to_gt_track_lengths',
|
||||
'classes_to_track_areas': 'classes_to_gt_track_areas'}
|
||||
else:
|
||||
key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
|
||||
'classes_to_track_ids': 'classes_to_dt_track_ids',
|
||||
'classes_to_track_lengths': 'classes_to_dt_track_lengths',
|
||||
'classes_to_track_areas': 'classes_to_dt_track_areas'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
|
||||
raw_data['num_timesteps'] = num_timesteps
|
||||
raw_data['neg_cat_ids'] = self.seq_to_classes[seq_id]['neg_cat_ids']
|
||||
raw_data['not_exhaustively_labeled_cls'] = self.seq_to_classes[seq_id]['not_exhaustively_labeled_cat_ids']
|
||||
raw_data['seq'] = seq
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
TAO:
|
||||
In TAO, the 4 preproc steps are as follow:
|
||||
1) All classes present in the ground truth data are evaluated separately.
|
||||
2) No matched tracker detections are removed.
|
||||
3) Unmatched tracker detections are removed if there is not ground truth data and the class does not
|
||||
belong to the categories marked as negative for this sequence. Additionally, unmatched tracker
|
||||
detections for classes which are marked as not exhaustively labeled are removed.
|
||||
4) No gt detections are removed.
|
||||
Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
|
||||
and the tracks from the tracker data are sorted according to the tracker confidence.
|
||||
"""
|
||||
cls_id = self.class_name_to_class_id[cls]
|
||||
is_not_exhaustively_labeled = cls_id in raw_data['not_exhaustively_labeled_cls']
|
||||
is_neg_category = cls_id in raw_data['neg_cat_ids']
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Only extract relevant dets for this class for preproc and eval (cls)
|
||||
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
|
||||
gt_class_mask = gt_class_mask.astype(np.bool)
|
||||
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
|
||||
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
|
||||
|
||||
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
|
||||
tracker_class_mask = tracker_class_mask.astype(np.bool)
|
||||
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
|
||||
tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
|
||||
tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
|
||||
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
|
||||
|
||||
# Match tracker and gt dets (with hungarian algorithm).
|
||||
unmatched_indices = np.arange(tracker_ids.shape[0])
|
||||
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
|
||||
matching_scores = similarity_scores.copy()
|
||||
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
|
||||
match_rows, match_cols = linear_sum_assignment(-matching_scores)
|
||||
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
|
||||
|
||||
if gt_ids.shape[0] == 0 and not is_neg_category:
|
||||
to_remove_tracker = unmatched_indices
|
||||
elif is_not_exhaustively_labeled:
|
||||
to_remove_tracker = unmatched_indices
|
||||
else:
|
||||
to_remove_tracker = np.array([], dtype=np.int)
|
||||
|
||||
# remove all unwanted unmatched tracker detections
|
||||
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
|
||||
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
|
||||
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
|
||||
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
|
||||
|
||||
data['gt_ids'][t] = gt_ids
|
||||
data['gt_dets'][t] = gt_dets
|
||||
data['similarity_scores'][t] = similarity_scores
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
data['seq'] = raw_data['seq']
|
||||
|
||||
# get track representations
|
||||
data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
|
||||
data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
|
||||
data['gt_track_lengths'] = raw_data['classes_to_gt_track_lengths'][cls_id]
|
||||
data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
|
||||
data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
|
||||
data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
|
||||
data['dt_track_lengths'] = raw_data['classes_to_dt_track_lengths'][cls_id]
|
||||
data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
|
||||
data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
|
||||
data['not_exhaustively_labeled'] = is_not_exhaustively_labeled
|
||||
data['iou_type'] = 'bbox'
|
||||
|
||||
# sort tracker data tracks by tracker confidence scores
|
||||
if data['dt_tracks']:
|
||||
idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
|
||||
data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
|
||||
data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
|
||||
data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
|
||||
data['dt_track_lengths'] = [data['dt_track_lengths'][i] for i in idx]
|
||||
data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
|
||||
# Ensure that ids are unique per timestep.
|
||||
self._check_unique_ids(data)
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t)
|
||||
return similarity_scores
|
||||
|
||||
def _merge_categories(self, annotations):
|
||||
"""
|
||||
Merges categories with a merged tag. Adapted from https://github.com/TAO-Dataset
|
||||
:param annotations: the annotations in which the classes should be merged
|
||||
:return: None
|
||||
"""
|
||||
merge_map = {}
|
||||
for category in self.gt_data['categories']:
|
||||
if 'merged' in category:
|
||||
for to_merge in category['merged']:
|
||||
merge_map[to_merge['id']] = category['id']
|
||||
|
||||
for ann in annotations:
|
||||
ann['category_id'] = merge_map.get(ann['category_id'], ann['category_id'])
|
||||
|
||||
def _compute_vid_mappings(self, annotations):
|
||||
"""
|
||||
Computes mappings from Videos to corresponding tracks and images.
|
||||
:param annotations: the annotations for which the mapping should be generated
|
||||
:return: the video-to-track-mapping, the video-to-image-mapping
|
||||
"""
|
||||
vids_to_tracks = {}
|
||||
vids_to_imgs = {}
|
||||
vid_ids = [vid['id'] for vid in self.gt_data['videos']]
|
||||
|
||||
# compute an mapping from image IDs to images
|
||||
images = {}
|
||||
for image in self.gt_data['images']:
|
||||
images[image['id']] = image
|
||||
|
||||
for ann in annotations:
|
||||
ann["area"] = ann["bbox"][2] * ann["bbox"][3]
|
||||
|
||||
vid = ann["video_id"]
|
||||
if ann["video_id"] not in vids_to_tracks.keys():
|
||||
vids_to_tracks[ann["video_id"]] = list()
|
||||
if ann["video_id"] not in vids_to_imgs.keys():
|
||||
vids_to_imgs[ann["video_id"]] = list()
|
||||
|
||||
# Fill in vids_to_tracks
|
||||
tid = ann["track_id"]
|
||||
exist_tids = [track["id"] for track in vids_to_tracks[vid]]
|
||||
try:
|
||||
index1 = exist_tids.index(tid)
|
||||
except ValueError:
|
||||
index1 = -1
|
||||
if tid not in exist_tids:
|
||||
curr_track = {"id": tid, "category_id": ann['category_id'],
|
||||
"video_id": vid, "annotations": [ann]}
|
||||
vids_to_tracks[vid].append(curr_track)
|
||||
else:
|
||||
vids_to_tracks[vid][index1]["annotations"].append(ann)
|
||||
|
||||
# Fill in vids_to_imgs
|
||||
img_id = ann['image_id']
|
||||
exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
|
||||
try:
|
||||
index2 = exist_img_ids.index(img_id)
|
||||
except ValueError:
|
||||
index2 = -1
|
||||
if index2 == -1:
|
||||
curr_img = {"id": img_id, "annotations": [ann]}
|
||||
vids_to_imgs[vid].append(curr_img)
|
||||
else:
|
||||
vids_to_imgs[vid][index2]["annotations"].append(ann)
|
||||
|
||||
# sort annotations by frame index and compute track area
|
||||
for vid, tracks in vids_to_tracks.items():
|
||||
for track in tracks:
|
||||
track["annotations"] = sorted(
|
||||
track['annotations'],
|
||||
key=lambda x: images[x['image_id']]['frame_index'])
|
||||
# Computer average area
|
||||
track["area"] = (sum(x['area'] for x in track['annotations']) / len(track['annotations']))
|
||||
|
||||
# Ensure all videos are present
|
||||
for vid_id in vid_ids:
|
||||
if vid_id not in vids_to_tracks.keys():
|
||||
vids_to_tracks[vid_id] = []
|
||||
if vid_id not in vids_to_imgs.keys():
|
||||
vids_to_imgs[vid_id] = []
|
||||
|
||||
return vids_to_tracks, vids_to_imgs
|
||||
|
||||
def _compute_image_to_timestep_mappings(self):
|
||||
"""
|
||||
Computes a mapping from images to the corresponding timestep in the sequence.
|
||||
:return: the image-to-timestep-mapping
|
||||
"""
|
||||
images = {}
|
||||
for image in self.gt_data['images']:
|
||||
images[image['id']] = image
|
||||
|
||||
seq_to_imgs_to_timestep = {vid['id']: dict() for vid in self.gt_data['videos']}
|
||||
for vid in seq_to_imgs_to_timestep:
|
||||
curr_imgs = [img['id'] for img in self.videos_to_gt_images[vid]]
|
||||
curr_imgs = sorted(curr_imgs, key=lambda x: images[x]['frame_index'])
|
||||
seq_to_imgs_to_timestep[vid] = {curr_imgs[i]: i for i in range(len(curr_imgs))}
|
||||
|
||||
return seq_to_imgs_to_timestep
|
||||
|
||||
def _limit_dets_per_image(self, annotations):
|
||||
"""
|
||||
Limits the number of detections for each image to config['MAX_DETECTIONS']. Adapted from
|
||||
https://github.com/TAO-Dataset/
|
||||
:param annotations: the annotations in which the detections should be limited
|
||||
:return: the annotations with limited detections
|
||||
"""
|
||||
max_dets = self.config['MAX_DETECTIONS']
|
||||
img_ann = defaultdict(list)
|
||||
for ann in annotations:
|
||||
img_ann[ann["image_id"]].append(ann)
|
||||
|
||||
for img_id, _anns in img_ann.items():
|
||||
if len(_anns) <= max_dets:
|
||||
continue
|
||||
_anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
|
||||
img_ann[img_id] = _anns[:max_dets]
|
||||
|
||||
return [ann for anns in img_ann.values() for ann in anns]
|
||||
|
||||
def _fill_video_ids_inplace(self, annotations):
|
||||
"""
|
||||
Fills in missing video IDs inplace. Adapted from https://github.com/TAO-Dataset/
|
||||
:param annotations: the annotations for which the videos IDs should be filled inplace
|
||||
:return: None
|
||||
"""
|
||||
missing_video_id = [x for x in annotations if 'video_id' not in x]
|
||||
if missing_video_id:
|
||||
image_id_to_video_id = {
|
||||
x['id']: x['video_id'] for x in self.gt_data['images']
|
||||
}
|
||||
for x in missing_video_id:
|
||||
x['video_id'] = image_id_to_video_id[x['image_id']]
|
||||
|
||||
@staticmethod
|
||||
def _make_track_ids_unique(annotations):
|
||||
"""
|
||||
Makes the track IDs unqiue over the whole annotation set. Adapted from https://github.com/TAO-Dataset/
|
||||
:param annotations: the annotation set
|
||||
:return: the number of updated IDs
|
||||
"""
|
||||
track_id_videos = {}
|
||||
track_ids_to_update = set()
|
||||
max_track_id = 0
|
||||
for ann in annotations:
|
||||
t = ann['track_id']
|
||||
if t not in track_id_videos:
|
||||
track_id_videos[t] = ann['video_id']
|
||||
|
||||
if ann['video_id'] != track_id_videos[t]:
|
||||
# Track id is assigned to multiple videos
|
||||
track_ids_to_update.add(t)
|
||||
max_track_id = max(max_track_id, t)
|
||||
|
||||
if track_ids_to_update:
|
||||
print('true')
|
||||
next_id = itertools.count(max_track_id + 1)
|
||||
new_track_ids = defaultdict(lambda: next(next_id))
|
||||
for ann in annotations:
|
||||
t = ann['track_id']
|
||||
v = ann['video_id']
|
||||
if t in track_ids_to_update:
|
||||
ann['track_id'] = new_track_ids[t, v]
|
||||
return len(track_ids_to_update)
|
||||
652
test/yolov7-tracker/tracker/trackeval/datasets/tao_ow.py
Normal file
652
test/yolov7-tracker/tracker/trackeval/datasets/tao_ow.py
Normal file
@@ -0,0 +1,652 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import json
|
||||
import itertools
|
||||
from collections import defaultdict
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ..utils import TrackEvalException
|
||||
from ._base_dataset import _BaseDataset
|
||||
from .. import utils
|
||||
from .. import _timing
|
||||
|
||||
|
||||
class TAO_OW(_BaseDataset):
|
||||
"""Dataset class for TAO tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes)
|
||||
'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val'
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
'MAX_DETECTIONS': 300, # Number of maximal allowed detections per image (0 for unlimited)
|
||||
'SUBSET': 'all'
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
self.gt_fol = self.config['GT_FOLDER']
|
||||
self.tracker_fol = self.config['TRACKERS_FOLDER']
|
||||
self.should_classes_combine = True
|
||||
self.use_super_categories = False
|
||||
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
|
||||
gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
|
||||
if len(gt_dir_files) != 1:
|
||||
raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
|
||||
|
||||
with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
|
||||
self.gt_data = json.load(f)
|
||||
|
||||
self.subset = self.config['SUBSET']
|
||||
if self.subset != 'all':
|
||||
# Split GT data into `known`, `unknown` or `distractor`
|
||||
self._split_known_unknown_distractor()
|
||||
self.gt_data = self._filter_gt_data(self.gt_data)
|
||||
|
||||
# merge categories marked with a merged tag in TAO dataset
|
||||
self._merge_categories(self.gt_data['annotations'] + self.gt_data['tracks'])
|
||||
|
||||
# Get sequences to eval and sequence information
|
||||
self.seq_list = [vid['name'].replace('/', '-') for vid in self.gt_data['videos']]
|
||||
self.seq_name_to_seq_id = {vid['name'].replace('/', '-'): vid['id'] for vid in self.gt_data['videos']}
|
||||
# compute mappings from videos to annotation data
|
||||
self.videos_to_gt_tracks, self.videos_to_gt_images = self._compute_vid_mappings(self.gt_data['annotations'])
|
||||
# compute sequence lengths
|
||||
self.seq_lengths = {vid['id']: 0 for vid in self.gt_data['videos']}
|
||||
for img in self.gt_data['images']:
|
||||
self.seq_lengths[img['video_id']] += 1
|
||||
self.seq_to_images_to_timestep = self._compute_image_to_timestep_mappings()
|
||||
self.seq_to_classes = {vid['id']: {'pos_cat_ids': list({track['category_id'] for track
|
||||
in self.videos_to_gt_tracks[vid['id']]}),
|
||||
'neg_cat_ids': vid['neg_category_ids'],
|
||||
'not_exhaustively_labeled_cat_ids': vid['not_exhaustive_category_ids']}
|
||||
for vid in self.gt_data['videos']}
|
||||
|
||||
# Get classes to eval
|
||||
considered_vid_ids = [self.seq_name_to_seq_id[vid] for vid in self.seq_list]
|
||||
seen_cats = set([cat_id for vid_id in considered_vid_ids for cat_id
|
||||
in self.seq_to_classes[vid_id]['pos_cat_ids']])
|
||||
# only classes with ground truth are evaluated in TAO
|
||||
self.valid_classes = [cls['name'] for cls in self.gt_data['categories'] if cls['id'] in seen_cats]
|
||||
# cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
|
||||
|
||||
if self.config['CLASSES_TO_EVAL']:
|
||||
# self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
# for cls in self.config['CLASSES_TO_EVAL']]
|
||||
self.class_list = ["object"] # class-agnostic
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
|
||||
', '.join(self.valid_classes) +
|
||||
' are valid (classes present in ground truth data).')
|
||||
else:
|
||||
# self.class_list = [cls for cls in self.valid_classes]
|
||||
self.class_list = ["object"] # class-agnostic
|
||||
# self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
|
||||
self.class_name_to_class_id = {"object": 1} # class-agnostic
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
|
||||
|
||||
for tracker in self.tracker_list:
|
||||
tr_dir_files = [file for file in os.listdir(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol))
|
||||
if file.endswith('.json')]
|
||||
if len(tr_dir_files) != 1:
|
||||
raise TrackEvalException(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
|
||||
+ ' does not contain exactly one json file.')
|
||||
with open(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, tr_dir_files[0])) as f:
|
||||
curr_data = json.load(f)
|
||||
|
||||
# limit detections if MAX_DETECTIONS > 0
|
||||
if self.config['MAX_DETECTIONS']:
|
||||
curr_data = self._limit_dets_per_image(curr_data)
|
||||
|
||||
# fill missing video ids
|
||||
self._fill_video_ids_inplace(curr_data)
|
||||
|
||||
# make track ids unique over whole evaluation set
|
||||
self._make_track_ids_unique(curr_data)
|
||||
|
||||
# merge categories marked with a merged tag in TAO dataset
|
||||
self._merge_categories(curr_data)
|
||||
|
||||
# get tracker sequence information
|
||||
curr_videos_to_tracker_tracks, curr_videos_to_tracker_images = self._compute_vid_mappings(curr_data)
|
||||
self.tracker_data[tracker]['vids_to_tracks'] = curr_videos_to_tracker_tracks
|
||||
self.tracker_data[tracker]['vids_to_images'] = curr_videos_to_tracker_images
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the TAO format
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets]: list (for each timestep) of lists of detections.
|
||||
[classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
|
||||
keys and corresponding segmentations as values) for each track
|
||||
[classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_lengths]: dictionary with class values
|
||||
as keys and lists (for each track) as values
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
|
||||
keys and corresponding segmentations as values) for each track
|
||||
[classes_to_dt_track_ids, classes_to_dt_track_areas, classes_to_dt_track_lengths]: dictionary with class values
|
||||
as keys and lists as values
|
||||
[classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
|
||||
"""
|
||||
seq_id = self.seq_name_to_seq_id[seq]
|
||||
# File location
|
||||
if is_gt:
|
||||
imgs = self.videos_to_gt_images[seq_id]
|
||||
else:
|
||||
imgs = self.tracker_data[tracker]['vids_to_images'][seq_id]
|
||||
|
||||
# Convert data to required format
|
||||
num_timesteps = self.seq_lengths[seq_id]
|
||||
img_to_timestep = self.seq_to_images_to_timestep[seq_id]
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if not is_gt:
|
||||
data_keys += ['tracker_confidences']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
for img in imgs:
|
||||
# some tracker data contains images without any ground truth information, these are ignored
|
||||
try:
|
||||
t = img_to_timestep[img['id']]
|
||||
except KeyError:
|
||||
continue
|
||||
annotations = img['annotations']
|
||||
raw_data['dets'][t] = np.atleast_2d([ann['bbox'] for ann in annotations]).astype(float)
|
||||
raw_data['ids'][t] = np.atleast_1d([ann['track_id'] for ann in annotations]).astype(int)
|
||||
raw_data['classes'][t] = np.atleast_1d([1 for _ in annotations]).astype(int) # class-agnostic
|
||||
if not is_gt:
|
||||
raw_data['tracker_confidences'][t] = np.atleast_1d([ann['score'] for ann in annotations]).astype(float)
|
||||
|
||||
for t, d in enumerate(raw_data['dets']):
|
||||
if d is None:
|
||||
raw_data['dets'][t] = np.empty((0, 4)).astype(float)
|
||||
raw_data['ids'][t] = np.empty(0).astype(int)
|
||||
raw_data['classes'][t] = np.empty(0).astype(int)
|
||||
if not is_gt:
|
||||
raw_data['tracker_confidences'][t] = np.empty(0)
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
|
||||
# all_classes = [self.class_name_to_class_id[cls] for cls in self.class_list]
|
||||
all_classes = [1] # class-agnostic
|
||||
|
||||
if is_gt:
|
||||
classes_to_consider = all_classes
|
||||
all_tracks = self.videos_to_gt_tracks[seq_id]
|
||||
else:
|
||||
# classes_to_consider = self.seq_to_classes[seq_id]['pos_cat_ids'] \
|
||||
# + self.seq_to_classes[seq_id]['neg_cat_ids']
|
||||
classes_to_consider = all_classes # class-agnostic
|
||||
all_tracks = self.tracker_data[tracker]['vids_to_tracks'][seq_id]
|
||||
|
||||
# classes_to_tracks = {cls: [track for track in all_tracks if track['category_id'] == cls]
|
||||
# if cls in classes_to_consider else [] for cls in all_classes}
|
||||
classes_to_tracks = {cls: [track for track in all_tracks]
|
||||
if cls in classes_to_consider else [] for cls in all_classes} # class-agnostic
|
||||
|
||||
# mapping from classes to track information
|
||||
raw_data['classes_to_tracks'] = {cls: [{det['image_id']: np.atleast_1d(det['bbox'])
|
||||
for det in track['annotations']} for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_lengths'] = {cls: [len(track['annotations']) for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
|
||||
if not is_gt:
|
||||
raw_data['classes_to_dt_track_scores'] = {cls: np.array([np.mean([float(x['score'])
|
||||
for x in track['annotations']])
|
||||
for track in tracks])
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
|
||||
if is_gt:
|
||||
key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
|
||||
'classes_to_track_ids': 'classes_to_gt_track_ids',
|
||||
'classes_to_track_lengths': 'classes_to_gt_track_lengths',
|
||||
'classes_to_track_areas': 'classes_to_gt_track_areas'}
|
||||
else:
|
||||
key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
|
||||
'classes_to_track_ids': 'classes_to_dt_track_ids',
|
||||
'classes_to_track_lengths': 'classes_to_dt_track_lengths',
|
||||
'classes_to_track_areas': 'classes_to_dt_track_areas'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
|
||||
raw_data['num_timesteps'] = num_timesteps
|
||||
raw_data['neg_cat_ids'] = self.seq_to_classes[seq_id]['neg_cat_ids']
|
||||
raw_data['not_exhaustively_labeled_cls'] = self.seq_to_classes[seq_id]['not_exhaustively_labeled_cat_ids']
|
||||
raw_data['seq'] = seq
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
TAO:
|
||||
In TAO, the 4 preproc steps are as follow:
|
||||
1) All classes present in the ground truth data are evaluated separately.
|
||||
2) No matched tracker detections are removed.
|
||||
3) Unmatched tracker detections are removed if there is not ground truth data and the class does not
|
||||
belong to the categories marked as negative for this sequence. Additionally, unmatched tracker
|
||||
detections for classes which are marked as not exhaustively labeled are removed.
|
||||
4) No gt detections are removed.
|
||||
Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
|
||||
and the tracks from the tracker data are sorted according to the tracker confidence.
|
||||
"""
|
||||
cls_id = self.class_name_to_class_id[cls]
|
||||
is_not_exhaustively_labeled = cls_id in raw_data['not_exhaustively_labeled_cls']
|
||||
is_neg_category = cls_id in raw_data['neg_cat_ids']
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Only extract relevant dets for this class for preproc and eval (cls)
|
||||
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
|
||||
gt_class_mask = gt_class_mask.astype(np.bool)
|
||||
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
|
||||
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
|
||||
|
||||
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
|
||||
tracker_class_mask = tracker_class_mask.astype(np.bool)
|
||||
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
|
||||
tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
|
||||
tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
|
||||
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
|
||||
|
||||
# Match tracker and gt dets (with hungarian algorithm).
|
||||
unmatched_indices = np.arange(tracker_ids.shape[0])
|
||||
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
|
||||
matching_scores = similarity_scores.copy()
|
||||
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
|
||||
match_rows, match_cols = linear_sum_assignment(-matching_scores)
|
||||
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
|
||||
|
||||
if gt_ids.shape[0] == 0 and not is_neg_category:
|
||||
to_remove_tracker = unmatched_indices
|
||||
elif is_not_exhaustively_labeled:
|
||||
to_remove_tracker = unmatched_indices
|
||||
else:
|
||||
to_remove_tracker = np.array([], dtype=np.int)
|
||||
|
||||
# remove all unwanted unmatched tracker detections
|
||||
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
|
||||
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
|
||||
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
|
||||
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
|
||||
|
||||
data['gt_ids'][t] = gt_ids
|
||||
data['gt_dets'][t] = gt_dets
|
||||
data['similarity_scores'][t] = similarity_scores
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
data['seq'] = raw_data['seq']
|
||||
|
||||
# get track representations
|
||||
data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
|
||||
data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
|
||||
data['gt_track_lengths'] = raw_data['classes_to_gt_track_lengths'][cls_id]
|
||||
data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
|
||||
data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
|
||||
data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
|
||||
data['dt_track_lengths'] = raw_data['classes_to_dt_track_lengths'][cls_id]
|
||||
data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
|
||||
data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
|
||||
data['not_exhaustively_labeled'] = is_not_exhaustively_labeled
|
||||
data['iou_type'] = 'bbox'
|
||||
|
||||
# sort tracker data tracks by tracker confidence scores
|
||||
if data['dt_tracks']:
|
||||
idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
|
||||
data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
|
||||
data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
|
||||
data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
|
||||
data['dt_track_lengths'] = [data['dt_track_lengths'][i] for i in idx]
|
||||
data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
|
||||
# Ensure that ids are unique per timestep.
|
||||
self._check_unique_ids(data)
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t)
|
||||
return similarity_scores
|
||||
|
||||
def _merge_categories(self, annotations):
|
||||
"""
|
||||
Merges categories with a merged tag. Adapted from https://github.com/TAO-Dataset
|
||||
:param annotations: the annotations in which the classes should be merged
|
||||
:return: None
|
||||
"""
|
||||
merge_map = {}
|
||||
for category in self.gt_data['categories']:
|
||||
if 'merged' in category:
|
||||
for to_merge in category['merged']:
|
||||
merge_map[to_merge['id']] = category['id']
|
||||
|
||||
for ann in annotations:
|
||||
ann['category_id'] = merge_map.get(ann['category_id'], ann['category_id'])
|
||||
|
||||
def _compute_vid_mappings(self, annotations):
|
||||
"""
|
||||
Computes mappings from Videos to corresponding tracks and images.
|
||||
:param annotations: the annotations for which the mapping should be generated
|
||||
:return: the video-to-track-mapping, the video-to-image-mapping
|
||||
"""
|
||||
vids_to_tracks = {}
|
||||
vids_to_imgs = {}
|
||||
vid_ids = [vid['id'] for vid in self.gt_data['videos']]
|
||||
|
||||
# compute an mapping from image IDs to images
|
||||
images = {}
|
||||
for image in self.gt_data['images']:
|
||||
images[image['id']] = image
|
||||
|
||||
for ann in annotations:
|
||||
ann["area"] = ann["bbox"][2] * ann["bbox"][3]
|
||||
|
||||
vid = ann["video_id"]
|
||||
if ann["video_id"] not in vids_to_tracks.keys():
|
||||
vids_to_tracks[ann["video_id"]] = list()
|
||||
if ann["video_id"] not in vids_to_imgs.keys():
|
||||
vids_to_imgs[ann["video_id"]] = list()
|
||||
|
||||
# Fill in vids_to_tracks
|
||||
tid = ann["track_id"]
|
||||
exist_tids = [track["id"] for track in vids_to_tracks[vid]]
|
||||
try:
|
||||
index1 = exist_tids.index(tid)
|
||||
except ValueError:
|
||||
index1 = -1
|
||||
if tid not in exist_tids:
|
||||
curr_track = {"id": tid, "category_id": ann['category_id'],
|
||||
"video_id": vid, "annotations": [ann]}
|
||||
vids_to_tracks[vid].append(curr_track)
|
||||
else:
|
||||
vids_to_tracks[vid][index1]["annotations"].append(ann)
|
||||
|
||||
# Fill in vids_to_imgs
|
||||
img_id = ann['image_id']
|
||||
exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
|
||||
try:
|
||||
index2 = exist_img_ids.index(img_id)
|
||||
except ValueError:
|
||||
index2 = -1
|
||||
if index2 == -1:
|
||||
curr_img = {"id": img_id, "annotations": [ann]}
|
||||
vids_to_imgs[vid].append(curr_img)
|
||||
else:
|
||||
vids_to_imgs[vid][index2]["annotations"].append(ann)
|
||||
|
||||
# sort annotations by frame index and compute track area
|
||||
for vid, tracks in vids_to_tracks.items():
|
||||
for track in tracks:
|
||||
track["annotations"] = sorted(
|
||||
track['annotations'],
|
||||
key=lambda x: images[x['image_id']]['frame_index'])
|
||||
# Computer average area
|
||||
track["area"] = (sum(x['area'] for x in track['annotations']) / len(track['annotations']))
|
||||
|
||||
# Ensure all videos are present
|
||||
for vid_id in vid_ids:
|
||||
if vid_id not in vids_to_tracks.keys():
|
||||
vids_to_tracks[vid_id] = []
|
||||
if vid_id not in vids_to_imgs.keys():
|
||||
vids_to_imgs[vid_id] = []
|
||||
|
||||
return vids_to_tracks, vids_to_imgs
|
||||
|
||||
def _compute_image_to_timestep_mappings(self):
|
||||
"""
|
||||
Computes a mapping from images to the corresponding timestep in the sequence.
|
||||
:return: the image-to-timestep-mapping
|
||||
"""
|
||||
images = {}
|
||||
for image in self.gt_data['images']:
|
||||
images[image['id']] = image
|
||||
|
||||
seq_to_imgs_to_timestep = {vid['id']: dict() for vid in self.gt_data['videos']}
|
||||
for vid in seq_to_imgs_to_timestep:
|
||||
curr_imgs = [img['id'] for img in self.videos_to_gt_images[vid]]
|
||||
curr_imgs = sorted(curr_imgs, key=lambda x: images[x]['frame_index'])
|
||||
seq_to_imgs_to_timestep[vid] = {curr_imgs[i]: i for i in range(len(curr_imgs))}
|
||||
|
||||
return seq_to_imgs_to_timestep
|
||||
|
||||
def _limit_dets_per_image(self, annotations):
|
||||
"""
|
||||
Limits the number of detections for each image to config['MAX_DETECTIONS']. Adapted from
|
||||
https://github.com/TAO-Dataset/
|
||||
:param annotations: the annotations in which the detections should be limited
|
||||
:return: the annotations with limited detections
|
||||
"""
|
||||
max_dets = self.config['MAX_DETECTIONS']
|
||||
img_ann = defaultdict(list)
|
||||
for ann in annotations:
|
||||
img_ann[ann["image_id"]].append(ann)
|
||||
|
||||
for img_id, _anns in img_ann.items():
|
||||
if len(_anns) <= max_dets:
|
||||
continue
|
||||
_anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
|
||||
img_ann[img_id] = _anns[:max_dets]
|
||||
|
||||
return [ann for anns in img_ann.values() for ann in anns]
|
||||
|
||||
def _fill_video_ids_inplace(self, annotations):
|
||||
"""
|
||||
Fills in missing video IDs inplace. Adapted from https://github.com/TAO-Dataset/
|
||||
:param annotations: the annotations for which the videos IDs should be filled inplace
|
||||
:return: None
|
||||
"""
|
||||
missing_video_id = [x for x in annotations if 'video_id' not in x]
|
||||
if missing_video_id:
|
||||
image_id_to_video_id = {
|
||||
x['id']: x['video_id'] for x in self.gt_data['images']
|
||||
}
|
||||
for x in missing_video_id:
|
||||
x['video_id'] = image_id_to_video_id[x['image_id']]
|
||||
|
||||
@staticmethod
|
||||
def _make_track_ids_unique(annotations):
|
||||
"""
|
||||
Makes the track IDs unqiue over the whole annotation set. Adapted from https://github.com/TAO-Dataset/
|
||||
:param annotations: the annotation set
|
||||
:return: the number of updated IDs
|
||||
"""
|
||||
track_id_videos = {}
|
||||
track_ids_to_update = set()
|
||||
max_track_id = 0
|
||||
for ann in annotations:
|
||||
t = ann['track_id']
|
||||
if t not in track_id_videos:
|
||||
track_id_videos[t] = ann['video_id']
|
||||
|
||||
if ann['video_id'] != track_id_videos[t]:
|
||||
# Track id is assigned to multiple videos
|
||||
track_ids_to_update.add(t)
|
||||
max_track_id = max(max_track_id, t)
|
||||
|
||||
if track_ids_to_update:
|
||||
print('true')
|
||||
next_id = itertools.count(max_track_id + 1)
|
||||
new_track_ids = defaultdict(lambda: next(next_id))
|
||||
for ann in annotations:
|
||||
t = ann['track_id']
|
||||
v = ann['video_id']
|
||||
if t in track_ids_to_update:
|
||||
ann['track_id'] = new_track_ids[t, v]
|
||||
return len(track_ids_to_update)
|
||||
|
||||
def _split_known_unknown_distractor(self):
|
||||
all_ids = set([i for i in range(1, 2000)]) # 2000 is larger than the max category id in TAO-OW.
|
||||
# `knowns` includes 78 TAO_category_ids that corresponds to 78 COCO classes.
|
||||
# (The other 2 COCO classes do not have corresponding classes in TAO).
|
||||
self.knowns = {4, 13, 1038, 544, 1057, 34, 35, 36, 41, 45, 58, 60, 579, 1091, 1097, 1099, 78, 79, 81, 91, 1115,
|
||||
1117, 95, 1122, 99, 1132, 621, 1135, 625, 118, 1144, 126, 642, 1155, 133, 1162, 139, 154, 174, 185,
|
||||
699, 1215, 714, 717, 1229, 211, 729, 221, 229, 747, 235, 237, 779, 276, 805, 299, 829, 852, 347,
|
||||
371, 382, 896, 392, 926, 937, 428, 429, 961, 452, 979, 980, 982, 475, 480, 993, 1001, 502, 1018}
|
||||
# `distractors` is defined as in the paper "Opening up Open-World Tracking"
|
||||
self.distractors = {20, 63, 108, 180, 188, 204, 212, 247, 303, 403, 407, 415, 490, 504, 507, 513, 529, 567,
|
||||
569, 588, 672, 691, 702, 708, 711, 720, 736, 737, 798, 813, 815, 827, 831, 851, 877, 883,
|
||||
912, 971, 976, 1130, 1133, 1134, 1169, 1184, 1220}
|
||||
self.unknowns = all_ids.difference(self.knowns.union(self.distractors))
|
||||
|
||||
def _filter_gt_data(self, raw_gt_data):
|
||||
"""
|
||||
Filter out irrelevant data in the raw_gt_data
|
||||
Args:
|
||||
raw_gt_data: directly loaded from json.
|
||||
|
||||
Returns:
|
||||
filtered gt_data
|
||||
"""
|
||||
valid_cat_ids = list()
|
||||
if self.subset == "known":
|
||||
valid_cat_ids = self.knowns
|
||||
elif self.subset == "distractor":
|
||||
valid_cat_ids = self.distractors
|
||||
elif self.subset == "unknown":
|
||||
valid_cat_ids = self.unknowns
|
||||
# elif self.subset == "test_only_unknowns":
|
||||
# valid_cat_ids = test_only_unknowns
|
||||
else:
|
||||
raise Exception("The parameter `SUBSET` is incorrect")
|
||||
|
||||
filtered = dict()
|
||||
filtered["videos"] = raw_gt_data["videos"]
|
||||
# filtered["videos"] = list()
|
||||
unwanted_vid = set()
|
||||
# for video in raw_gt_data["videos"]:
|
||||
# datasrc = video["name"].split('/')[1]
|
||||
# if datasrc in data_srcs:
|
||||
# filtered["videos"].append(video)
|
||||
# else:
|
||||
# unwanted_vid.add(video["id"])
|
||||
|
||||
filtered["annotations"] = list()
|
||||
for ann in raw_gt_data["annotations"]:
|
||||
if (ann["video_id"] not in unwanted_vid) and (ann["category_id"] in valid_cat_ids):
|
||||
filtered["annotations"].append(ann)
|
||||
|
||||
filtered["tracks"] = list()
|
||||
for track in raw_gt_data["tracks"]:
|
||||
if (track["video_id"] not in unwanted_vid) and (track["category_id"] in valid_cat_ids):
|
||||
filtered["tracks"].append(track)
|
||||
|
||||
filtered["images"] = list()
|
||||
for image in raw_gt_data["images"]:
|
||||
if image["video_id"] not in unwanted_vid:
|
||||
filtered["images"].append(image)
|
||||
|
||||
filtered["categories"] = list()
|
||||
for cat in raw_gt_data["categories"]:
|
||||
if cat["id"] in valid_cat_ids:
|
||||
filtered["categories"].append(cat)
|
||||
|
||||
filtered["info"] = raw_gt_data["info"]
|
||||
filtered["licenses"] = raw_gt_data["licenses"]
|
||||
|
||||
return filtered
|
||||
438
test/yolov7-tracker/tracker/trackeval/datasets/visdrone.py
Normal file
438
test/yolov7-tracker/tracker/trackeval/datasets/visdrone.py
Normal file
@@ -0,0 +1,438 @@
|
||||
import os
|
||||
import csv
|
||||
import configparser
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ._base_dataset import _BaseDataset
|
||||
from .. import utils
|
||||
from .. import _timing
|
||||
from ..utils import TrackEvalException
|
||||
|
||||
|
||||
class VisDrone2DBox(_BaseDataset):
|
||||
"""Dataset class for MOT Challenge 2D bounding box tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/mot_challenge/'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/mot_challenge/'), # Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'CLASSES_TO_EVAL': ['pedestrain', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'], # Valid: ['pedestrian']
|
||||
'BENCHMARK': 'MOT17', # Valid: 'MOT17', 'MOT16', 'MOT20', 'MOT15'
|
||||
'SPLIT_TO_EVAL': 'train', # Valid: 'train', 'test', 'all'
|
||||
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'DO_PREPROC': True, # Whether to perform preprocessing (never done for MOT15)
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
|
||||
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use seqmap_folder/benchmark-split_to_eval)
|
||||
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
|
||||
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt', # '{gt_folder}/{seq}/gt/gt.txt'
|
||||
'SKIP_SPLIT_FOL': False, # If False, data is in GT_FOLDER/BENCHMARK-SPLIT_TO_EVAL/ and in
|
||||
# TRACKERS_FOLDER/BENCHMARK-SPLIT_TO_EVAL/tracker/
|
||||
# If True, then the middle 'benchmark-split' folder is skipped for both.
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
|
||||
self.benchmark = self.config['BENCHMARK']
|
||||
gt_set = self.config['BENCHMARK'] + '-' + self.config['SPLIT_TO_EVAL']
|
||||
self.gt_set = gt_set
|
||||
if not self.config['SKIP_SPLIT_FOL']:
|
||||
split_fol = gt_set
|
||||
else:
|
||||
split_fol = ''
|
||||
self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
|
||||
self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
|
||||
self.should_classes_combine = False
|
||||
self.use_super_categories = False
|
||||
self.data_is_zipped = self.config['INPUT_AS_ZIP']
|
||||
self.do_preproc = self.config['DO_PREPROC']
|
||||
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
|
||||
# Get classes to eval
|
||||
self.valid_classes = ['pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor']
|
||||
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
for cls in self.config['CLASSES_TO_EVAL']]
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
|
||||
self.class_name_to_class_id = {'ignored': 0, 'pedestrian': 1, 'people': 2, 'bicycle': 3, 'car': 4, 'van': 5,
|
||||
'truck': 6, 'tricycle': 7, 'awning-tricycle': 8, 'bus': 9,
|
||||
'motor': 10, 'other': 11}
|
||||
self.valid_class_numbers = list(self.class_name_to_class_id.values())
|
||||
|
||||
# Get sequences to eval and check gt files exist
|
||||
self.seq_list, self.seq_lengths = self._get_seq_info()
|
||||
if len(self.seq_list) < 1:
|
||||
raise TrackEvalException('No sequences are selected to be evaluated.')
|
||||
|
||||
# Check gt files exist
|
||||
for seq in self.seq_list:
|
||||
if not self.data_is_zipped:
|
||||
curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
|
||||
if not os.path.isfile(curr_file):
|
||||
print('GT file not found ' + curr_file)
|
||||
raise TrackEvalException('GT file not found for sequence: ' + seq)
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('GT file not found ' + curr_file)
|
||||
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
for tracker in self.tracker_list:
|
||||
if self.data_is_zipped:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
|
||||
else:
|
||||
for seq in self.seq_list:
|
||||
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
if not os.path.isfile(curr_file):
|
||||
print('Tracker file not found: ' + curr_file)
|
||||
raise TrackEvalException(
|
||||
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
|
||||
curr_file))
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _get_seq_info(self):
|
||||
seq_list = []
|
||||
seq_lengths = {}
|
||||
if self.config["SEQ_INFO"]:
|
||||
seq_list = list(self.config["SEQ_INFO"].keys())
|
||||
seq_lengths = self.config["SEQ_INFO"]
|
||||
|
||||
# If sequence length is 'None' tries to read sequence length from .ini files.
|
||||
for seq, seq_length in seq_lengths.items():
|
||||
if seq_length is None:
|
||||
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
|
||||
if not os.path.isfile(ini_file):
|
||||
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
|
||||
ini_data = configparser.ConfigParser()
|
||||
ini_data.read(ini_file)
|
||||
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
|
||||
|
||||
else:
|
||||
if self.config["SEQMAP_FILE"]:
|
||||
seqmap_file = self.config["SEQMAP_FILE"]
|
||||
else:
|
||||
if self.config["SEQMAP_FOLDER"] is None:
|
||||
seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
|
||||
else:
|
||||
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
|
||||
if not os.path.isfile(seqmap_file):
|
||||
print('no seqmap found: ' + seqmap_file)
|
||||
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
|
||||
with open(seqmap_file) as fp:
|
||||
reader = csv.reader(fp)
|
||||
for i, row in enumerate(reader):
|
||||
if i == 0 or row[0] == '':
|
||||
continue
|
||||
seq = row[0]
|
||||
seq_list.append(seq)
|
||||
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
|
||||
if not os.path.isfile(ini_file):
|
||||
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
|
||||
ini_data = configparser.ConfigParser()
|
||||
ini_data.read(ini_file)
|
||||
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
|
||||
return seq_list, seq_lengths
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the MOT Challenge 2D box format
|
||||
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
|
||||
[gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
"""
|
||||
# File location
|
||||
if self.data_is_zipped:
|
||||
if is_gt:
|
||||
zip_file = os.path.join(self.gt_fol, 'data.zip')
|
||||
else:
|
||||
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
|
||||
file = seq + '.txt'
|
||||
else:
|
||||
zip_file = None
|
||||
if is_gt:
|
||||
file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
|
||||
else:
|
||||
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
|
||||
|
||||
# Load raw data from text file
|
||||
read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file)
|
||||
|
||||
# Convert data to required format
|
||||
num_timesteps = self.seq_lengths[seq]
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if is_gt:
|
||||
data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
|
||||
else:
|
||||
data_keys += ['tracker_confidences']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
|
||||
# Check for any extra time keys
|
||||
current_time_keys = [str( t+ 1) for t in range(num_timesteps)]
|
||||
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
|
||||
if len(extra_time_keys) > 0:
|
||||
if is_gt:
|
||||
text = 'Ground-truth'
|
||||
else:
|
||||
text = 'Tracking'
|
||||
raise TrackEvalException(
|
||||
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
|
||||
[str(x) + ', ' for x in extra_time_keys]))
|
||||
|
||||
for t in range(num_timesteps):
|
||||
time_key = str(t+1)
|
||||
if time_key in read_data.keys():
|
||||
try:
|
||||
time_data = np.asarray(read_data[time_key], dtype=np.float)
|
||||
except ValueError:
|
||||
if is_gt:
|
||||
raise TrackEvalException(
|
||||
'Cannot convert gt data for sequence %s to float. Is data corrupted?' % seq)
|
||||
else:
|
||||
raise TrackEvalException(
|
||||
'Cannot convert tracking data from tracker %s, sequence %s to float. Is data corrupted?' % (
|
||||
tracker, seq))
|
||||
try:
|
||||
raw_data['dets'][t] = np.atleast_2d(time_data[:, 2:6])
|
||||
raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
|
||||
except IndexError:
|
||||
if is_gt:
|
||||
err = 'Cannot load gt data from sequence %s, because there is not enough ' \
|
||||
'columns in the data.' % seq
|
||||
raise TrackEvalException(err)
|
||||
else:
|
||||
err = 'Cannot load tracker data from tracker %s, sequence %s, because there is not enough ' \
|
||||
'columns in the data.' % (tracker, seq)
|
||||
raise TrackEvalException(err)
|
||||
if time_data.shape[1] >= 8:
|
||||
raw_data['classes'][t] = np.atleast_1d(time_data[:, 7]).astype(int)
|
||||
else:
|
||||
if not is_gt:
|
||||
raw_data['classes'][t] = np.ones_like(raw_data['ids'][t])
|
||||
else:
|
||||
raise TrackEvalException(
|
||||
'GT data is not in a valid format, there is not enough rows in seq %s, timestep %i.' % (
|
||||
seq, t))
|
||||
if is_gt:
|
||||
gt_extras_dict = {'zero_marked': np.atleast_1d(time_data[:, 6].astype(int))}
|
||||
raw_data['gt_extras'][t] = gt_extras_dict
|
||||
else:
|
||||
raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 6])
|
||||
else:
|
||||
raw_data['dets'][t] = np.empty((0, 4))
|
||||
raw_data['ids'][t] = np.empty(0).astype(int)
|
||||
raw_data['classes'][t] = np.empty(0).astype(int)
|
||||
if is_gt:
|
||||
gt_extras_dict = {'zero_marked': np.empty(0)}
|
||||
raw_data['gt_extras'][t] = gt_extras_dict
|
||||
else:
|
||||
raw_data['tracker_confidences'][t] = np.empty(0)
|
||||
if is_gt:
|
||||
raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
raw_data['num_timesteps'] = num_timesteps
|
||||
raw_data['seq'] = seq
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
|
||||
MOT Challenge:
|
||||
In MOT Challenge, the 4 preproc steps are as follow:
|
||||
1) There is only one class (pedestrian) to be evaluated, but all other classes are used for preproc.
|
||||
2) Predictions are matched against all gt boxes (regardless of class), those matching with distractor
|
||||
objects are removed.
|
||||
3) There is no crowd ignore regions.
|
||||
4) All gt dets except pedestrian are removed, also removes pedestrian gt dets marked with zero_marked.
|
||||
"""
|
||||
# Check that input data has unique ids
|
||||
self._check_unique_ids(raw_data)
|
||||
|
||||
# distractor_class_names = ['person_on_vehicle', 'static_person', 'distractor', 'reflection']
|
||||
distractor_class_names = ['ignored', 'other']
|
||||
if self.benchmark == 'MOT20':
|
||||
distractor_class_names.append('non_mot_vehicle')
|
||||
distractor_classes = [self.class_name_to_class_id[x] for x in distractor_class_names]
|
||||
cls_id = self.class_name_to_class_id[cls]
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Get all data
|
||||
gt_ids = raw_data['gt_ids'][t]
|
||||
gt_dets = raw_data['gt_dets'][t]
|
||||
gt_classes = raw_data['gt_classes'][t]
|
||||
gt_zero_marked = raw_data['gt_extras'][t]['zero_marked']
|
||||
|
||||
tracker_ids = raw_data['tracker_ids'][t]
|
||||
tracker_dets = raw_data['tracker_dets'][t]
|
||||
tracker_classes = raw_data['tracker_classes'][t]
|
||||
tracker_confidences = raw_data['tracker_confidences'][t]
|
||||
similarity_scores = raw_data['similarity_scores'][t]
|
||||
|
||||
# Evaluation is ONLY valid for pedestrian class
|
||||
if len(tracker_classes) > 0 and np.max(tracker_classes) > 1:
|
||||
raise TrackEvalException(
|
||||
'Evaluation is only valid for pedestrian class. Non pedestrian class (%i) found in sequence %s at '
|
||||
'timestep %i.' % (np.max(tracker_classes), raw_data['seq'], t))
|
||||
|
||||
# Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
|
||||
# which are labeled as belonging to a distractor class.
|
||||
to_remove_tracker = np.array([], np.int)
|
||||
if self.do_preproc and self.benchmark != 'MOT15' and gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
|
||||
|
||||
# Check all classes are valid:
|
||||
invalid_classes = np.setdiff1d(np.unique(gt_classes), self.valid_class_numbers)
|
||||
if len(invalid_classes) > 0:
|
||||
print(' '.join([str(x) for x in invalid_classes]))
|
||||
raise(TrackEvalException('Attempting to evaluate using invalid gt classes. '
|
||||
'This warning only triggers if preprocessing is performed, '
|
||||
'e.g. not for MOT15 or where prepropressing is explicitly disabled. '
|
||||
'Please either check your gt data, or disable preprocessing. '
|
||||
'The following invalid classes were found in timestep ' + str(t) + ': ' +
|
||||
' '.join([str(x) for x in invalid_classes])))
|
||||
|
||||
matching_scores = similarity_scores.copy()
|
||||
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
|
||||
match_rows, match_cols = linear_sum_assignment(-matching_scores)
|
||||
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
match_rows = match_rows[actually_matched_mask]
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
|
||||
is_distractor_class = np.isin(gt_classes[match_rows], distractor_classes)
|
||||
to_remove_tracker = match_cols[is_distractor_class]
|
||||
|
||||
# Apply preprocessing to remove all unwanted tracker dets.
|
||||
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
|
||||
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
|
||||
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
|
||||
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
|
||||
|
||||
# Remove gt detections marked as to remove (zero marked), and also remove gt detections not in pedestrian
|
||||
# class (not applicable for MOT15)
|
||||
if self.do_preproc and self.benchmark != 'MOT15':
|
||||
gt_to_keep_mask = (np.not_equal(gt_zero_marked, 0)) & \
|
||||
(np.equal(gt_classes, cls_id))
|
||||
else:
|
||||
# There are no classes for MOT15
|
||||
gt_to_keep_mask = np.not_equal(gt_zero_marked, 0)
|
||||
data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
|
||||
data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
|
||||
data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
data['seq'] = raw_data['seq']
|
||||
|
||||
# Ensure again that ids are unique per timestep after preproc.
|
||||
self._check_unique_ids(data, after_preproc=True)
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='xywh')
|
||||
return similarity_scores
|
||||
364
test/yolov7-tracker/tracker/trackeval/datasets/youtube_vis.py
Normal file
364
test/yolov7-tracker/tracker/trackeval/datasets/youtube_vis.py
Normal file
@@ -0,0 +1,364 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import json
|
||||
from ._base_dataset import _BaseDataset
|
||||
from ..utils import TrackEvalException
|
||||
from .. import utils
|
||||
from .. import _timing
|
||||
|
||||
|
||||
class YouTubeVIS(_BaseDataset):
|
||||
"""Dataset class for YouTubeVIS tracking"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_dataset_config():
|
||||
"""Default class config values"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'GT_FOLDER': os.path.join(code_path, 'data/gt/youtube_vis/'), # Location of GT data
|
||||
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/youtube_vis/'),
|
||||
# Trackers location
|
||||
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
|
||||
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
|
||||
'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes)
|
||||
'SPLIT_TO_EVAL': 'train_sub_split', # Valid: 'train', 'val', 'train_sub_split'
|
||||
'PRINT_CONFIG': True, # Whether to print current config
|
||||
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
|
||||
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
|
||||
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise dataset, checking that all required files are present"""
|
||||
super().__init__()
|
||||
# Fill non-given config values with defaults
|
||||
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
|
||||
self.gt_fol = self.config['GT_FOLDER'] + 'youtube_vis_' + self.config['SPLIT_TO_EVAL']
|
||||
self.tracker_fol = self.config['TRACKERS_FOLDER'] + 'youtube_vis_' + self.config['SPLIT_TO_EVAL']
|
||||
self.use_super_categories = False
|
||||
self.should_classes_combine = True
|
||||
|
||||
self.output_fol = self.config['OUTPUT_FOLDER']
|
||||
if self.output_fol is None:
|
||||
self.output_fol = self.tracker_fol
|
||||
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
|
||||
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
|
||||
|
||||
if not os.path.exists(self.gt_fol):
|
||||
print("GT folder not found: " + self.gt_fol)
|
||||
raise TrackEvalException("GT folder not found: " + os.path.basename(self.gt_fol))
|
||||
gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
|
||||
if len(gt_dir_files) != 1:
|
||||
raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
|
||||
|
||||
with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
|
||||
self.gt_data = json.load(f)
|
||||
|
||||
# Get classes to eval
|
||||
self.valid_classes = [cls['name'] for cls in self.gt_data['categories']]
|
||||
cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
|
||||
|
||||
if self.config['CLASSES_TO_EVAL']:
|
||||
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
|
||||
for cls in self.config['CLASSES_TO_EVAL']]
|
||||
if not all(self.class_list):
|
||||
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
|
||||
', '.join(self.valid_classes) + ' are valid.')
|
||||
else:
|
||||
self.class_list = [cls['name'] for cls in self.gt_data['categories']]
|
||||
self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
|
||||
|
||||
# Get sequences to eval and check gt files exist
|
||||
self.seq_list = [vid['file_names'][0].split('/')[0] for vid in self.gt_data['videos']]
|
||||
self.seq_name_to_seq_id = {vid['file_names'][0].split('/')[0]: vid['id'] for vid in self.gt_data['videos']}
|
||||
self.seq_lengths = {vid['id']: len(vid['file_names']) for vid in self.gt_data['videos']}
|
||||
|
||||
# encode masks and compute track areas
|
||||
self._prepare_gt_annotations()
|
||||
|
||||
# Get trackers to eval
|
||||
if self.config['TRACKERS_TO_EVAL'] is None:
|
||||
self.tracker_list = os.listdir(self.tracker_fol)
|
||||
else:
|
||||
self.tracker_list = self.config['TRACKERS_TO_EVAL']
|
||||
|
||||
if self.config['TRACKER_DISPLAY_NAMES'] is None:
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
|
||||
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
|
||||
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
|
||||
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
|
||||
else:
|
||||
raise TrackEvalException('List of tracker files and tracker display names do not match.')
|
||||
|
||||
# counter for globally unique track IDs
|
||||
self.global_tid_counter = 0
|
||||
|
||||
self.tracker_data = dict()
|
||||
for tracker in self.tracker_list:
|
||||
tracker_dir_path = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
|
||||
tr_dir_files = [file for file in os.listdir(tracker_dir_path) if file.endswith('.json')]
|
||||
if len(tr_dir_files) != 1:
|
||||
raise TrackEvalException(tracker_dir_path + ' does not contain exactly one json file.')
|
||||
|
||||
with open(os.path.join(tracker_dir_path, tr_dir_files[0])) as f:
|
||||
curr_data = json.load(f)
|
||||
|
||||
self.tracker_data[tracker] = curr_data
|
||||
|
||||
def get_display_name(self, tracker):
|
||||
return self.tracker_to_disp[tracker]
|
||||
|
||||
def _load_raw_file(self, tracker, seq, is_gt):
|
||||
"""Load a file (gt or tracker) in the YouTubeVIS format
|
||||
If is_gt, this returns a dict which contains the fields:
|
||||
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets]: list (for each timestep) of lists of detections.
|
||||
[classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
|
||||
keys and corresponding segmentations as values) for each track
|
||||
[classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_iscrowd]: dictionary with class values
|
||||
as keys and lists (for each track) as values
|
||||
|
||||
if not is_gt, this returns a dict which contains the fields:
|
||||
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
|
||||
[tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
|
||||
keys and corresponding segmentations as values) for each track
|
||||
[classes_to_dt_track_ids, classes_to_dt_track_areas]: dictionary with class values as keys and lists as values
|
||||
[classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
|
||||
"""
|
||||
# select sequence tracks
|
||||
seq_id = self.seq_name_to_seq_id[seq]
|
||||
if is_gt:
|
||||
tracks = [ann for ann in self.gt_data['annotations'] if ann['video_id'] == seq_id]
|
||||
else:
|
||||
tracks = self._get_tracker_seq_tracks(tracker, seq_id)
|
||||
|
||||
# Convert data to required format
|
||||
num_timesteps = self.seq_lengths[seq_id]
|
||||
data_keys = ['ids', 'classes', 'dets']
|
||||
if not is_gt:
|
||||
data_keys += ['tracker_confidences']
|
||||
raw_data = {key: [None] * num_timesteps for key in data_keys}
|
||||
for t in range(num_timesteps):
|
||||
raw_data['dets'][t] = [track['segmentations'][t] for track in tracks if track['segmentations'][t]]
|
||||
raw_data['ids'][t] = np.atleast_1d([track['id'] for track in tracks
|
||||
if track['segmentations'][t]]).astype(int)
|
||||
raw_data['classes'][t] = np.atleast_1d([track['category_id'] for track in tracks
|
||||
if track['segmentations'][t]]).astype(int)
|
||||
if not is_gt:
|
||||
raw_data['tracker_confidences'][t] = np.atleast_1d([track['score'] for track in tracks
|
||||
if track['segmentations'][t]]).astype(float)
|
||||
|
||||
if is_gt:
|
||||
key_map = {'ids': 'gt_ids',
|
||||
'classes': 'gt_classes',
|
||||
'dets': 'gt_dets'}
|
||||
else:
|
||||
key_map = {'ids': 'tracker_ids',
|
||||
'classes': 'tracker_classes',
|
||||
'dets': 'tracker_dets'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
|
||||
all_cls_ids = {self.class_name_to_class_id[cls] for cls in self.class_list}
|
||||
classes_to_tracks = {cls: [track for track in tracks if track['category_id'] == cls] for cls in all_cls_ids}
|
||||
|
||||
# mapping from classes to track representations and track information
|
||||
raw_data['classes_to_tracks'] = {cls: [{i: track['segmentations'][i]
|
||||
for i in range(len(track['segmentations']))} for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
|
||||
if is_gt:
|
||||
raw_data['classes_to_gt_track_iscrowd'] = {cls: [track['iscrowd'] for track in tracks]
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
else:
|
||||
raw_data['classes_to_dt_track_scores'] = {cls: np.array([track['score'] for track in tracks])
|
||||
for cls, tracks in classes_to_tracks.items()}
|
||||
|
||||
if is_gt:
|
||||
key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
|
||||
'classes_to_track_ids': 'classes_to_gt_track_ids',
|
||||
'classes_to_track_areas': 'classes_to_gt_track_areas'}
|
||||
else:
|
||||
key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
|
||||
'classes_to_track_ids': 'classes_to_dt_track_ids',
|
||||
'classes_to_track_areas': 'classes_to_dt_track_areas'}
|
||||
for k, v in key_map.items():
|
||||
raw_data[v] = raw_data.pop(k)
|
||||
|
||||
raw_data['num_timesteps'] = num_timesteps
|
||||
raw_data['seq'] = seq
|
||||
return raw_data
|
||||
|
||||
@_timing.time
|
||||
def get_preprocessed_seq_data(self, raw_data, cls):
|
||||
""" Preprocess data for a single sequence for a single class ready for evaluation.
|
||||
Inputs:
|
||||
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
|
||||
- cls is the class to be evaluated.
|
||||
Outputs:
|
||||
- data is a dict containing all of the information that metrics need to perform evaluation.
|
||||
It contains the following fields:
|
||||
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
|
||||
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
|
||||
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
|
||||
[similarity_scores]: list (for each timestep) of 2D NDArrays.
|
||||
Notes:
|
||||
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
|
||||
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
|
||||
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
|
||||
distractor class, or otherwise marked as to be removed.
|
||||
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
|
||||
other criteria (e.g. are too small).
|
||||
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
|
||||
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
|
||||
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
|
||||
unique within each timestep.
|
||||
YouTubeVIS:
|
||||
In YouTubeVIS, the 4 preproc steps are as follow:
|
||||
1) There are 40 classes which are evaluated separately.
|
||||
2) No matched tracker dets are removed.
|
||||
3) No unmatched tracker dets are removed.
|
||||
4) No gt dets are removed.
|
||||
Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
|
||||
and the tracks from the tracker data are sorted according to the tracker confidence.
|
||||
"""
|
||||
cls_id = self.class_name_to_class_id[cls]
|
||||
|
||||
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
|
||||
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
|
||||
unique_gt_ids = []
|
||||
unique_tracker_ids = []
|
||||
num_gt_dets = 0
|
||||
num_tracker_dets = 0
|
||||
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
|
||||
# Only extract relevant dets for this class for eval (cls)
|
||||
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
|
||||
gt_class_mask = gt_class_mask.astype(np.bool)
|
||||
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
|
||||
gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
|
||||
|
||||
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
|
||||
tracker_class_mask = tracker_class_mask.astype(np.bool)
|
||||
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
|
||||
tracker_dets = [raw_data['tracker_dets'][t][ind] for ind in range(len(tracker_class_mask)) if
|
||||
tracker_class_mask[ind]]
|
||||
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
|
||||
|
||||
data['tracker_ids'][t] = tracker_ids
|
||||
data['tracker_dets'][t] = tracker_dets
|
||||
data['gt_ids'][t] = gt_ids
|
||||
data['gt_dets'][t] = gt_dets
|
||||
data['similarity_scores'][t] = similarity_scores
|
||||
|
||||
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
|
||||
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
|
||||
num_tracker_dets += len(data['tracker_ids'][t])
|
||||
num_gt_dets += len(data['gt_ids'][t])
|
||||
|
||||
# Re-label IDs such that there are no empty IDs
|
||||
if len(unique_gt_ids) > 0:
|
||||
unique_gt_ids = np.unique(unique_gt_ids)
|
||||
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
|
||||
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['gt_ids'][t]) > 0:
|
||||
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
|
||||
if len(unique_tracker_ids) > 0:
|
||||
unique_tracker_ids = np.unique(unique_tracker_ids)
|
||||
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
|
||||
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
|
||||
for t in range(raw_data['num_timesteps']):
|
||||
if len(data['tracker_ids'][t]) > 0:
|
||||
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
|
||||
|
||||
# Ensure that ids are unique per timestep.
|
||||
self._check_unique_ids(data)
|
||||
|
||||
# Record overview statistics.
|
||||
data['num_tracker_dets'] = num_tracker_dets
|
||||
data['num_gt_dets'] = num_gt_dets
|
||||
data['num_tracker_ids'] = len(unique_tracker_ids)
|
||||
data['num_gt_ids'] = len(unique_gt_ids)
|
||||
data['num_timesteps'] = raw_data['num_timesteps']
|
||||
data['seq'] = raw_data['seq']
|
||||
|
||||
# get track representations
|
||||
data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
|
||||
data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
|
||||
data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
|
||||
data['gt_track_iscrowd'] = raw_data['classes_to_gt_track_iscrowd'][cls_id]
|
||||
data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
|
||||
data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
|
||||
data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
|
||||
data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
|
||||
data['iou_type'] = 'mask'
|
||||
|
||||
# sort tracker data tracks by tracker confidence scores
|
||||
if data['dt_tracks']:
|
||||
idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
|
||||
data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
|
||||
data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
|
||||
data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
|
||||
data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
|
||||
|
||||
return data
|
||||
|
||||
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
|
||||
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
|
||||
return similarity_scores
|
||||
|
||||
def _prepare_gt_annotations(self):
|
||||
"""
|
||||
Prepares GT data by rle encoding segmentations and computing the average track area.
|
||||
:return: None
|
||||
"""
|
||||
# only loaded when needed to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
|
||||
for track in self.gt_data['annotations']:
|
||||
h = track['height']
|
||||
w = track['width']
|
||||
for i, seg in enumerate(track['segmentations']):
|
||||
if seg:
|
||||
track['segmentations'][i] = mask_utils.frPyObjects(seg, h, w)
|
||||
areas = [a for a in track['areas'] if a]
|
||||
if len(areas) == 0:
|
||||
track['area'] = 0
|
||||
else:
|
||||
track['area'] = np.array(areas).mean()
|
||||
|
||||
def _get_tracker_seq_tracks(self, tracker, seq_id):
|
||||
"""
|
||||
Prepares tracker data for a given sequence. Extracts all annotations for given sequence ID, computes
|
||||
average track area and assigns a track ID.
|
||||
:param tracker: the given tracker
|
||||
:param seq_id: the sequence ID
|
||||
:return: the extracted tracks
|
||||
"""
|
||||
# only loaded when needed to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
|
||||
tracks = [ann for ann in self.tracker_data[tracker] if ann['video_id'] == seq_id]
|
||||
for track in tracks:
|
||||
track['areas'] = []
|
||||
for seg in track['segmentations']:
|
||||
if seg:
|
||||
track['areas'].append(mask_utils.area(seg))
|
||||
else:
|
||||
track['areas'].append(None)
|
||||
areas = [a for a in track['areas'] if a]
|
||||
if len(areas) == 0:
|
||||
track['area'] = 0
|
||||
else:
|
||||
track['area'] = np.array(areas).mean()
|
||||
track['id'] = self.global_tid_counter
|
||||
self.global_tid_counter += 1
|
||||
return tracks
|
||||
225
test/yolov7-tracker/tracker/trackeval/eval.py
Normal file
225
test/yolov7-tracker/tracker/trackeval/eval.py
Normal file
@@ -0,0 +1,225 @@
|
||||
import time
|
||||
import traceback
|
||||
from multiprocessing.pool import Pool
|
||||
from functools import partial
|
||||
import os
|
||||
from . import utils
|
||||
from .utils import TrackEvalException
|
||||
from . import _timing
|
||||
from .metrics import Count
|
||||
|
||||
try:
|
||||
import tqdm
|
||||
TQDM_IMPORTED = True
|
||||
except ImportError as _:
|
||||
TQDM_IMPORTED = False
|
||||
|
||||
|
||||
class Evaluator:
|
||||
"""Evaluator class for evaluating different metrics for different datasets"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_eval_config():
|
||||
"""Returns the default config values for evaluation"""
|
||||
code_path = utils.get_code_path()
|
||||
default_config = {
|
||||
'USE_PARALLEL': False,
|
||||
'NUM_PARALLEL_CORES': 8,
|
||||
'BREAK_ON_ERROR': True, # Raises exception and exits with error
|
||||
'RETURN_ON_ERROR': False, # if not BREAK_ON_ERROR, then returns from function on error
|
||||
'LOG_ON_ERROR': os.path.join(code_path, 'error_log.txt'), # if not None, save any errors into a log file.
|
||||
|
||||
'PRINT_RESULTS': True,
|
||||
'PRINT_ONLY_COMBINED': False,
|
||||
'PRINT_CONFIG': True,
|
||||
'TIME_PROGRESS': True,
|
||||
'DISPLAY_LESS_PROGRESS': True,
|
||||
|
||||
'OUTPUT_SUMMARY': True,
|
||||
'OUTPUT_EMPTY_CLASSES': True, # If False, summary files are not output for classes with no detections
|
||||
'OUTPUT_DETAILED': True,
|
||||
'PLOT_CURVES': True,
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""Initialise the evaluator with a config file"""
|
||||
self.config = utils.init_config(config, self.get_default_eval_config(), 'Eval')
|
||||
# Only run timing analysis if not run in parallel.
|
||||
if self.config['TIME_PROGRESS'] and not self.config['USE_PARALLEL']:
|
||||
_timing.DO_TIMING = True
|
||||
if self.config['DISPLAY_LESS_PROGRESS']:
|
||||
_timing.DISPLAY_LESS_PROGRESS = True
|
||||
|
||||
@_timing.time
|
||||
def evaluate(self, dataset_list, metrics_list, show_progressbar=False):
|
||||
"""Evaluate a set of metrics on a set of datasets"""
|
||||
config = self.config
|
||||
metrics_list = metrics_list + [Count()] # Count metrics are always run
|
||||
metric_names = utils.validate_metrics_list(metrics_list)
|
||||
dataset_names = [dataset.get_name() for dataset in dataset_list]
|
||||
output_res = {}
|
||||
output_msg = {}
|
||||
|
||||
for dataset, dataset_name in zip(dataset_list, dataset_names):
|
||||
# Get dataset info about what to evaluate
|
||||
output_res[dataset_name] = {}
|
||||
output_msg[dataset_name] = {}
|
||||
tracker_list, seq_list, class_list = dataset.get_eval_info()
|
||||
print('\nEvaluating %i tracker(s) on %i sequence(s) for %i class(es) on %s dataset using the following '
|
||||
'metrics: %s\n' % (len(tracker_list), len(seq_list), len(class_list), dataset_name,
|
||||
', '.join(metric_names)))
|
||||
|
||||
# Evaluate each tracker
|
||||
for tracker in tracker_list:
|
||||
# if not config['BREAK_ON_ERROR'] then go to next tracker without breaking
|
||||
try:
|
||||
# Evaluate each sequence in parallel or in series.
|
||||
# returns a nested dict (res), indexed like: res[seq][class][metric_name][sub_metric field]
|
||||
# e.g. res[seq_0001][pedestrian][hota][DetA]
|
||||
print('\nEvaluating %s\n' % tracker)
|
||||
time_start = time.time()
|
||||
if config['USE_PARALLEL']:
|
||||
if show_progressbar and TQDM_IMPORTED:
|
||||
seq_list_sorted = sorted(seq_list)
|
||||
|
||||
with Pool(config['NUM_PARALLEL_CORES']) as pool, tqdm.tqdm(total=len(seq_list)) as pbar:
|
||||
_eval_sequence = partial(eval_sequence, dataset=dataset, tracker=tracker,
|
||||
class_list=class_list, metrics_list=metrics_list,
|
||||
metric_names=metric_names)
|
||||
results = []
|
||||
for r in pool.imap(_eval_sequence, seq_list_sorted,
|
||||
chunksize=20):
|
||||
results.append(r)
|
||||
pbar.update()
|
||||
res = dict(zip(seq_list_sorted, results))
|
||||
|
||||
else:
|
||||
with Pool(config['NUM_PARALLEL_CORES']) as pool:
|
||||
_eval_sequence = partial(eval_sequence, dataset=dataset, tracker=tracker,
|
||||
class_list=class_list, metrics_list=metrics_list,
|
||||
metric_names=metric_names)
|
||||
results = pool.map(_eval_sequence, seq_list)
|
||||
res = dict(zip(seq_list, results))
|
||||
else:
|
||||
res = {}
|
||||
if show_progressbar and TQDM_IMPORTED:
|
||||
seq_list_sorted = sorted(seq_list)
|
||||
for curr_seq in tqdm.tqdm(seq_list_sorted):
|
||||
res[curr_seq] = eval_sequence(curr_seq, dataset, tracker, class_list, metrics_list,
|
||||
metric_names)
|
||||
else:
|
||||
for curr_seq in sorted(seq_list):
|
||||
res[curr_seq] = eval_sequence(curr_seq, dataset, tracker, class_list, metrics_list,
|
||||
metric_names)
|
||||
|
||||
# Combine results over all sequences and then over all classes
|
||||
|
||||
# collecting combined cls keys (cls averaged, det averaged, super classes)
|
||||
combined_cls_keys = []
|
||||
res['COMBINED_SEQ'] = {}
|
||||
# combine sequences for each class
|
||||
for c_cls in class_list:
|
||||
res['COMBINED_SEQ'][c_cls] = {}
|
||||
for metric, metric_name in zip(metrics_list, metric_names):
|
||||
curr_res = {seq_key: seq_value[c_cls][metric_name] for seq_key, seq_value in res.items() if
|
||||
seq_key != 'COMBINED_SEQ'}
|
||||
res['COMBINED_SEQ'][c_cls][metric_name] = metric.combine_sequences(curr_res)
|
||||
# combine classes
|
||||
if dataset.should_classes_combine:
|
||||
combined_cls_keys += ['cls_comb_cls_av', 'cls_comb_det_av', 'all']
|
||||
res['COMBINED_SEQ']['cls_comb_cls_av'] = {}
|
||||
res['COMBINED_SEQ']['cls_comb_det_av'] = {}
|
||||
for metric, metric_name in zip(metrics_list, metric_names):
|
||||
cls_res = {cls_key: cls_value[metric_name] for cls_key, cls_value in
|
||||
res['COMBINED_SEQ'].items() if cls_key not in combined_cls_keys}
|
||||
res['COMBINED_SEQ']['cls_comb_cls_av'][metric_name] = \
|
||||
metric.combine_classes_class_averaged(cls_res)
|
||||
res['COMBINED_SEQ']['cls_comb_det_av'][metric_name] = \
|
||||
metric.combine_classes_det_averaged(cls_res)
|
||||
# combine classes to super classes
|
||||
if dataset.use_super_categories:
|
||||
for cat, sub_cats in dataset.super_categories.items():
|
||||
combined_cls_keys.append(cat)
|
||||
res['COMBINED_SEQ'][cat] = {}
|
||||
for metric, metric_name in zip(metrics_list, metric_names):
|
||||
cat_res = {cls_key: cls_value[metric_name] for cls_key, cls_value in
|
||||
res['COMBINED_SEQ'].items() if cls_key in sub_cats}
|
||||
res['COMBINED_SEQ'][cat][metric_name] = metric.combine_classes_det_averaged(cat_res)
|
||||
|
||||
# Print and output results in various formats
|
||||
if config['TIME_PROGRESS']:
|
||||
print('\nAll sequences for %s finished in %.2f seconds' % (tracker, time.time() - time_start))
|
||||
output_fol = dataset.get_output_fol(tracker)
|
||||
tracker_display_name = dataset.get_display_name(tracker)
|
||||
for c_cls in res['COMBINED_SEQ'].keys(): # class_list + combined classes if calculated
|
||||
summaries = []
|
||||
details = []
|
||||
num_dets = res['COMBINED_SEQ'][c_cls]['Count']['Dets']
|
||||
if config['OUTPUT_EMPTY_CLASSES'] or num_dets > 0:
|
||||
for metric, metric_name in zip(metrics_list, metric_names):
|
||||
# for combined classes there is no per sequence evaluation
|
||||
if c_cls in combined_cls_keys:
|
||||
table_res = {'COMBINED_SEQ': res['COMBINED_SEQ'][c_cls][metric_name]}
|
||||
else:
|
||||
table_res = {seq_key: seq_value[c_cls][metric_name] for seq_key, seq_value
|
||||
in res.items()}
|
||||
|
||||
if config['PRINT_RESULTS'] and config['PRINT_ONLY_COMBINED']:
|
||||
dont_print = dataset.should_classes_combine and c_cls not in combined_cls_keys
|
||||
if not dont_print:
|
||||
metric.print_table({'COMBINED_SEQ': table_res['COMBINED_SEQ']},
|
||||
tracker_display_name, c_cls)
|
||||
elif config['PRINT_RESULTS']:
|
||||
metric.print_table(table_res, tracker_display_name, c_cls)
|
||||
if config['OUTPUT_SUMMARY']:
|
||||
summaries.append(metric.summary_results(table_res))
|
||||
if config['OUTPUT_DETAILED']:
|
||||
details.append(metric.detailed_results(table_res))
|
||||
if config['PLOT_CURVES']:
|
||||
metric.plot_single_tracker_results(table_res, tracker_display_name, c_cls,
|
||||
output_fol)
|
||||
if config['OUTPUT_SUMMARY']:
|
||||
utils.write_summary_results(summaries, c_cls, output_fol)
|
||||
if config['OUTPUT_DETAILED']:
|
||||
utils.write_detailed_results(details, c_cls, output_fol)
|
||||
|
||||
# Output for returning from function
|
||||
output_res[dataset_name][tracker] = res
|
||||
output_msg[dataset_name][tracker] = 'Success'
|
||||
|
||||
except Exception as err:
|
||||
output_res[dataset_name][tracker] = None
|
||||
if type(err) == TrackEvalException:
|
||||
output_msg[dataset_name][tracker] = str(err)
|
||||
else:
|
||||
output_msg[dataset_name][tracker] = 'Unknown error occurred.'
|
||||
print('Tracker %s was unable to be evaluated.' % tracker)
|
||||
print(err)
|
||||
traceback.print_exc()
|
||||
if config['LOG_ON_ERROR'] is not None:
|
||||
with open(config['LOG_ON_ERROR'], 'a') as f:
|
||||
print(dataset_name, file=f)
|
||||
print(tracker, file=f)
|
||||
print(traceback.format_exc(), file=f)
|
||||
print('\n\n\n', file=f)
|
||||
if config['BREAK_ON_ERROR']:
|
||||
raise err
|
||||
elif config['RETURN_ON_ERROR']:
|
||||
return output_res, output_msg
|
||||
|
||||
return output_res, output_msg
|
||||
|
||||
|
||||
@_timing.time
|
||||
def eval_sequence(seq, dataset, tracker, class_list, metrics_list, metric_names):
|
||||
"""Function for evaluating a single sequence"""
|
||||
|
||||
raw_data = dataset.get_raw_seq_data(tracker, seq)
|
||||
seq_res = {}
|
||||
for cls in class_list:
|
||||
seq_res[cls] = {}
|
||||
data = dataset.get_preprocessed_seq_data(raw_data, cls)
|
||||
for metric, met_name in zip(metrics_list, metric_names):
|
||||
seq_res[cls][met_name] = metric.eval_sequence(data)
|
||||
return seq_res
|
||||
@@ -0,0 +1,8 @@
|
||||
from .hota import HOTA
|
||||
from .clear import CLEAR
|
||||
from .identity import Identity
|
||||
from .count import Count
|
||||
from .j_and_f import JAndF
|
||||
from .track_map import TrackMAP
|
||||
from .vace import VACE
|
||||
from .ideucl import IDEucl
|
||||
133
test/yolov7-tracker/tracker/trackeval/metrics/_base_metric.py
Normal file
133
test/yolov7-tracker/tracker/trackeval/metrics/_base_metric.py
Normal file
@@ -0,0 +1,133 @@
|
||||
|
||||
import numpy as np
|
||||
from abc import ABC, abstractmethod
|
||||
from .. import _timing
|
||||
from ..utils import TrackEvalException
|
||||
|
||||
|
||||
class _BaseMetric(ABC):
|
||||
@abstractmethod
|
||||
def __init__(self):
|
||||
self.plottable = False
|
||||
self.integer_fields = []
|
||||
self.float_fields = []
|
||||
self.array_labels = []
|
||||
self.integer_array_fields = []
|
||||
self.float_array_fields = []
|
||||
self.fields = []
|
||||
self.summary_fields = []
|
||||
self.registered = False
|
||||
|
||||
#####################################################################
|
||||
# Abstract functions for subclasses to implement
|
||||
|
||||
@_timing.time
|
||||
@abstractmethod
|
||||
def eval_sequence(self, data):
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def combine_sequences(self, all_res):
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
|
||||
...
|
||||
|
||||
@ abstractmethod
|
||||
def combine_classes_det_averaged(self, all_res):
|
||||
...
|
||||
|
||||
def plot_single_tracker_results(self, all_res, tracker, output_folder, cls):
|
||||
"""Plot results of metrics, only valid for metrics with self.plottable"""
|
||||
if self.plottable:
|
||||
raise NotImplementedError('plot_results is not implemented for metric %s' % self.get_name())
|
||||
else:
|
||||
pass
|
||||
|
||||
#####################################################################
|
||||
# Helper functions which are useful for all metrics:
|
||||
|
||||
@classmethod
|
||||
def get_name(cls):
|
||||
return cls.__name__
|
||||
|
||||
@staticmethod
|
||||
def _combine_sum(all_res, field):
|
||||
"""Combine sequence results via sum"""
|
||||
return sum([all_res[k][field] for k in all_res.keys()])
|
||||
|
||||
@staticmethod
|
||||
def _combine_weighted_av(all_res, field, comb_res, weight_field):
|
||||
"""Combine sequence results via weighted average"""
|
||||
return sum([all_res[k][field] * all_res[k][weight_field] for k in all_res.keys()]) / np.maximum(1.0, comb_res[
|
||||
weight_field])
|
||||
|
||||
def print_table(self, table_res, tracker, cls):
|
||||
"""Prints table of results for all sequences"""
|
||||
print('')
|
||||
metric_name = self.get_name()
|
||||
self._row_print([metric_name + ': ' + tracker + '-' + cls] + self.summary_fields)
|
||||
for seq, results in sorted(table_res.items()):
|
||||
if seq == 'COMBINED_SEQ':
|
||||
continue
|
||||
summary_res = self._summary_row(results)
|
||||
self._row_print([seq] + summary_res)
|
||||
summary_res = self._summary_row(table_res['COMBINED_SEQ'])
|
||||
self._row_print(['COMBINED'] + summary_res)
|
||||
|
||||
def _summary_row(self, results_):
|
||||
vals = []
|
||||
for h in self.summary_fields:
|
||||
if h in self.float_array_fields:
|
||||
vals.append("{0:1.5g}".format(100 * np.mean(results_[h])))
|
||||
elif h in self.float_fields:
|
||||
vals.append("{0:1.5g}".format(100 * float(results_[h])))
|
||||
elif h in self.integer_fields:
|
||||
vals.append("{0:d}".format(int(results_[h])))
|
||||
else:
|
||||
raise NotImplementedError("Summary function not implemented for this field type.")
|
||||
return vals
|
||||
|
||||
@staticmethod
|
||||
def _row_print(*argv):
|
||||
"""Prints results in an evenly spaced rows, with more space in first row"""
|
||||
if len(argv) == 1:
|
||||
argv = argv[0]
|
||||
to_print = '%-35s' % argv[0]
|
||||
for v in argv[1:]:
|
||||
to_print += '%-10s' % str(v)
|
||||
print(to_print)
|
||||
|
||||
def summary_results(self, table_res):
|
||||
"""Returns a simple summary of final results for a tracker"""
|
||||
return dict(zip(self.summary_fields, self._summary_row(table_res['COMBINED_SEQ'])))
|
||||
|
||||
def detailed_results(self, table_res):
|
||||
"""Returns detailed final results for a tracker"""
|
||||
# Get detailed field information
|
||||
detailed_fields = self.float_fields + self.integer_fields
|
||||
for h in self.float_array_fields + self.integer_array_fields:
|
||||
for alpha in [int(100*x) for x in self.array_labels]:
|
||||
detailed_fields.append(h + '___' + str(alpha))
|
||||
detailed_fields.append(h + '___AUC')
|
||||
|
||||
# Get detailed results
|
||||
detailed_results = {}
|
||||
for seq, res in table_res.items():
|
||||
detailed_row = self._detailed_row(res)
|
||||
if len(detailed_row) != len(detailed_fields):
|
||||
raise TrackEvalException(
|
||||
'Field names and data have different sizes (%i and %i)' % (len(detailed_row), len(detailed_fields)))
|
||||
detailed_results[seq] = dict(zip(detailed_fields, detailed_row))
|
||||
return detailed_results
|
||||
|
||||
def _detailed_row(self, res):
|
||||
detailed_row = []
|
||||
for h in self.float_fields + self.integer_fields:
|
||||
detailed_row.append(res[h])
|
||||
for h in self.float_array_fields + self.integer_array_fields:
|
||||
for i, alpha in enumerate([int(100 * x) for x in self.array_labels]):
|
||||
detailed_row.append(res[h][i])
|
||||
detailed_row.append(np.mean(res[h]))
|
||||
return detailed_row
|
||||
186
test/yolov7-tracker/tracker/trackeval/metrics/clear.py
Normal file
186
test/yolov7-tracker/tracker/trackeval/metrics/clear.py
Normal file
@@ -0,0 +1,186 @@
|
||||
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ._base_metric import _BaseMetric
|
||||
from .. import _timing
|
||||
from .. import utils
|
||||
|
||||
class CLEAR(_BaseMetric):
|
||||
"""Class which implements the CLEAR metrics"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_config():
|
||||
"""Default class config values"""
|
||||
default_config = {
|
||||
'THRESHOLD': 0.5, # Similarity score threshold required for a TP match. Default 0.5.
|
||||
'PRINT_CONFIG': True, # Whether to print the config information on init. Default: False.
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
super().__init__()
|
||||
main_integer_fields = ['CLR_TP', 'CLR_FN', 'CLR_FP', 'IDSW', 'MT', 'PT', 'ML', 'Frag']
|
||||
extra_integer_fields = ['CLR_Frames']
|
||||
self.integer_fields = main_integer_fields + extra_integer_fields
|
||||
main_float_fields = ['MOTA', 'MOTP', 'MODA', 'CLR_Re', 'CLR_Pr', 'MTR', 'PTR', 'MLR', 'sMOTA']
|
||||
extra_float_fields = ['CLR_F1', 'FP_per_frame', 'MOTAL', 'MOTP_sum']
|
||||
self.float_fields = main_float_fields + extra_float_fields
|
||||
self.fields = self.float_fields + self.integer_fields
|
||||
self.summed_fields = self.integer_fields + ['MOTP_sum']
|
||||
self.summary_fields = main_float_fields + main_integer_fields
|
||||
|
||||
# Configuration options:
|
||||
self.config = utils.init_config(config, self.get_default_config(), self.get_name())
|
||||
self.threshold = float(self.config['THRESHOLD'])
|
||||
|
||||
|
||||
@_timing.time
|
||||
def eval_sequence(self, data):
|
||||
"""Calculates CLEAR metrics for one sequence"""
|
||||
# Initialise results
|
||||
res = {}
|
||||
for field in self.fields:
|
||||
res[field] = 0
|
||||
|
||||
# Return result quickly if tracker or gt sequence is empty
|
||||
if data['num_tracker_dets'] == 0:
|
||||
res['CLR_FN'] = data['num_gt_dets']
|
||||
res['ML'] = data['num_gt_ids']
|
||||
res['MLR'] = 1.0
|
||||
return res
|
||||
if data['num_gt_dets'] == 0:
|
||||
res['CLR_FP'] = data['num_tracker_dets']
|
||||
res['MLR'] = 1.0
|
||||
return res
|
||||
|
||||
# Variables counting global association
|
||||
num_gt_ids = data['num_gt_ids']
|
||||
gt_id_count = np.zeros(num_gt_ids) # For MT/ML/PT
|
||||
gt_matched_count = np.zeros(num_gt_ids) # For MT/ML/PT
|
||||
gt_frag_count = np.zeros(num_gt_ids) # For Frag
|
||||
|
||||
# Note that IDSWs are counted based on the last time each gt_id was present (any number of frames previously),
|
||||
# but are only used in matching to continue current tracks based on the gt_id in the single previous timestep.
|
||||
prev_tracker_id = np.nan * np.zeros(num_gt_ids) # For scoring IDSW
|
||||
prev_timestep_tracker_id = np.nan * np.zeros(num_gt_ids) # For matching IDSW
|
||||
|
||||
# Calculate scores for each timestep
|
||||
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
|
||||
# Deal with the case that there are no gt_det/tracker_det in a timestep.
|
||||
if len(gt_ids_t) == 0:
|
||||
res['CLR_FP'] += len(tracker_ids_t)
|
||||
continue
|
||||
if len(tracker_ids_t) == 0:
|
||||
res['CLR_FN'] += len(gt_ids_t)
|
||||
gt_id_count[gt_ids_t] += 1
|
||||
continue
|
||||
|
||||
# Calc score matrix to first minimise IDSWs from previous frame, and then maximise MOTP secondarily
|
||||
similarity = data['similarity_scores'][t]
|
||||
score_mat = (tracker_ids_t[np.newaxis, :] == prev_timestep_tracker_id[gt_ids_t[:, np.newaxis]])
|
||||
score_mat = 1000 * score_mat + similarity
|
||||
score_mat[similarity < self.threshold - np.finfo('float').eps] = 0
|
||||
|
||||
# Hungarian algorithm to find best matches
|
||||
match_rows, match_cols = linear_sum_assignment(-score_mat)
|
||||
actually_matched_mask = score_mat[match_rows, match_cols] > 0 + np.finfo('float').eps
|
||||
match_rows = match_rows[actually_matched_mask]
|
||||
match_cols = match_cols[actually_matched_mask]
|
||||
|
||||
matched_gt_ids = gt_ids_t[match_rows]
|
||||
matched_tracker_ids = tracker_ids_t[match_cols]
|
||||
|
||||
# Calc IDSW for MOTA
|
||||
prev_matched_tracker_ids = prev_tracker_id[matched_gt_ids]
|
||||
is_idsw = (np.logical_not(np.isnan(prev_matched_tracker_ids))) & (
|
||||
np.not_equal(matched_tracker_ids, prev_matched_tracker_ids))
|
||||
res['IDSW'] += np.sum(is_idsw)
|
||||
|
||||
# Update counters for MT/ML/PT/Frag and record for IDSW/Frag for next timestep
|
||||
gt_id_count[gt_ids_t] += 1
|
||||
gt_matched_count[matched_gt_ids] += 1
|
||||
not_previously_tracked = np.isnan(prev_timestep_tracker_id)
|
||||
prev_tracker_id[matched_gt_ids] = matched_tracker_ids
|
||||
prev_timestep_tracker_id[:] = np.nan
|
||||
prev_timestep_tracker_id[matched_gt_ids] = matched_tracker_ids
|
||||
currently_tracked = np.logical_not(np.isnan(prev_timestep_tracker_id))
|
||||
gt_frag_count += np.logical_and(not_previously_tracked, currently_tracked)
|
||||
|
||||
# Calculate and accumulate basic statistics
|
||||
num_matches = len(matched_gt_ids)
|
||||
res['CLR_TP'] += num_matches
|
||||
res['CLR_FN'] += len(gt_ids_t) - num_matches
|
||||
res['CLR_FP'] += len(tracker_ids_t) - num_matches
|
||||
if num_matches > 0:
|
||||
res['MOTP_sum'] += sum(similarity[match_rows, match_cols])
|
||||
|
||||
# Calculate MT/ML/PT/Frag/MOTP
|
||||
tracked_ratio = gt_matched_count[gt_id_count > 0] / gt_id_count[gt_id_count > 0]
|
||||
res['MT'] = np.sum(np.greater(tracked_ratio, 0.8))
|
||||
res['PT'] = np.sum(np.greater_equal(tracked_ratio, 0.2)) - res['MT']
|
||||
res['ML'] = num_gt_ids - res['MT'] - res['PT']
|
||||
res['Frag'] = np.sum(np.subtract(gt_frag_count[gt_frag_count > 0], 1))
|
||||
res['MOTP'] = res['MOTP_sum'] / np.maximum(1.0, res['CLR_TP'])
|
||||
|
||||
res['CLR_Frames'] = data['num_timesteps']
|
||||
|
||||
# Calculate final CLEAR scores
|
||||
res = self._compute_final_fields(res)
|
||||
return res
|
||||
|
||||
def combine_sequences(self, all_res):
|
||||
"""Combines metrics across all sequences"""
|
||||
res = {}
|
||||
for field in self.summed_fields:
|
||||
res[field] = self._combine_sum(all_res, field)
|
||||
res = self._compute_final_fields(res)
|
||||
return res
|
||||
|
||||
def combine_classes_det_averaged(self, all_res):
|
||||
"""Combines metrics across all classes by averaging over the detection values"""
|
||||
res = {}
|
||||
for field in self.summed_fields:
|
||||
res[field] = self._combine_sum(all_res, field)
|
||||
res = self._compute_final_fields(res)
|
||||
return res
|
||||
|
||||
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
|
||||
"""Combines metrics across all classes by averaging over the class values.
|
||||
If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
|
||||
"""
|
||||
res = {}
|
||||
for field in self.integer_fields:
|
||||
if ignore_empty_classes:
|
||||
res[field] = self._combine_sum(
|
||||
{k: v for k, v in all_res.items() if v['CLR_TP'] + v['CLR_FN'] + v['CLR_FP'] > 0}, field)
|
||||
else:
|
||||
res[field] = self._combine_sum({k: v for k, v in all_res.items()}, field)
|
||||
for field in self.float_fields:
|
||||
if ignore_empty_classes:
|
||||
res[field] = np.mean(
|
||||
[v[field] for v in all_res.values() if v['CLR_TP'] + v['CLR_FN'] + v['CLR_FP'] > 0], axis=0)
|
||||
else:
|
||||
res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def _compute_final_fields(res):
|
||||
"""Calculate sub-metric ('field') values which only depend on other sub-metric values.
|
||||
This function is used both for both per-sequence calculation, and in combining values across sequences.
|
||||
"""
|
||||
num_gt_ids = res['MT'] + res['ML'] + res['PT']
|
||||
res['MTR'] = res['MT'] / np.maximum(1.0, num_gt_ids)
|
||||
res['MLR'] = res['ML'] / np.maximum(1.0, num_gt_ids)
|
||||
res['PTR'] = res['PT'] / np.maximum(1.0, num_gt_ids)
|
||||
res['CLR_Re'] = res['CLR_TP'] / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
|
||||
res['CLR_Pr'] = res['CLR_TP'] / np.maximum(1.0, res['CLR_TP'] + res['CLR_FP'])
|
||||
res['MODA'] = (res['CLR_TP'] - res['CLR_FP']) / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
|
||||
res['MOTA'] = (res['CLR_TP'] - res['CLR_FP'] - res['IDSW']) / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
|
||||
res['MOTP'] = res['MOTP_sum'] / np.maximum(1.0, res['CLR_TP'])
|
||||
res['sMOTA'] = (res['MOTP_sum'] - res['CLR_FP'] - res['IDSW']) / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
|
||||
|
||||
res['CLR_F1'] = res['CLR_TP'] / np.maximum(1.0, res['CLR_TP'] + 0.5*res['CLR_FN'] + 0.5*res['CLR_FP'])
|
||||
res['FP_per_frame'] = res['CLR_FP'] / np.maximum(1.0, res['CLR_Frames'])
|
||||
safe_log_idsw = np.log10(res['IDSW']) if res['IDSW'] > 0 else res['IDSW']
|
||||
res['MOTAL'] = (res['CLR_TP'] - res['CLR_FP'] - safe_log_idsw) / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
|
||||
return res
|
||||
44
test/yolov7-tracker/tracker/trackeval/metrics/count.py
Normal file
44
test/yolov7-tracker/tracker/trackeval/metrics/count.py
Normal file
@@ -0,0 +1,44 @@
|
||||
|
||||
from ._base_metric import _BaseMetric
|
||||
from .. import _timing
|
||||
|
||||
|
||||
class Count(_BaseMetric):
|
||||
"""Class which simply counts the number of tracker and gt detections and ids."""
|
||||
def __init__(self, config=None):
|
||||
super().__init__()
|
||||
self.integer_fields = ['Dets', 'GT_Dets', 'IDs', 'GT_IDs']
|
||||
self.fields = self.integer_fields
|
||||
self.summary_fields = self.fields
|
||||
|
||||
@_timing.time
|
||||
def eval_sequence(self, data):
|
||||
"""Returns counts for one sequence"""
|
||||
# Get results
|
||||
res = {'Dets': data['num_tracker_dets'],
|
||||
'GT_Dets': data['num_gt_dets'],
|
||||
'IDs': data['num_tracker_ids'],
|
||||
'GT_IDs': data['num_gt_ids'],
|
||||
'Frames': data['num_timesteps']}
|
||||
return res
|
||||
|
||||
def combine_sequences(self, all_res):
|
||||
"""Combines metrics across all sequences"""
|
||||
res = {}
|
||||
for field in self.integer_fields:
|
||||
res[field] = self._combine_sum(all_res, field)
|
||||
return res
|
||||
|
||||
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=None):
|
||||
"""Combines metrics across all classes by averaging over the class values"""
|
||||
res = {}
|
||||
for field in self.integer_fields:
|
||||
res[field] = self._combine_sum(all_res, field)
|
||||
return res
|
||||
|
||||
def combine_classes_det_averaged(self, all_res):
|
||||
"""Combines metrics across all classes by averaging over the detection values"""
|
||||
res = {}
|
||||
for field in self.integer_fields:
|
||||
res[field] = self._combine_sum(all_res, field)
|
||||
return res
|
||||
203
test/yolov7-tracker/tracker/trackeval/metrics/hota.py
Normal file
203
test/yolov7-tracker/tracker/trackeval/metrics/hota.py
Normal file
@@ -0,0 +1,203 @@
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ._base_metric import _BaseMetric
|
||||
from .. import _timing
|
||||
|
||||
|
||||
class HOTA(_BaseMetric):
|
||||
"""Class which implements the HOTA metrics.
|
||||
See: https://link.springer.com/article/10.1007/s11263-020-01375-2
|
||||
"""
|
||||
|
||||
def __init__(self, config=None):
|
||||
super().__init__()
|
||||
self.plottable = True
|
||||
self.array_labels = np.arange(0.05, 0.99, 0.05)
|
||||
self.integer_array_fields = ['HOTA_TP', 'HOTA_FN', 'HOTA_FP']
|
||||
self.float_array_fields = ['HOTA', 'DetA', 'AssA', 'DetRe', 'DetPr', 'AssRe', 'AssPr', 'LocA', 'OWTA']
|
||||
self.float_fields = ['HOTA(0)', 'LocA(0)', 'HOTALocA(0)']
|
||||
self.fields = self.float_array_fields + self.integer_array_fields + self.float_fields
|
||||
self.summary_fields = self.float_array_fields + self.float_fields
|
||||
|
||||
@_timing.time
|
||||
def eval_sequence(self, data):
|
||||
"""Calculates the HOTA metrics for one sequence"""
|
||||
|
||||
# Initialise results
|
||||
res = {}
|
||||
for field in self.float_array_fields + self.integer_array_fields:
|
||||
res[field] = np.zeros((len(self.array_labels)), dtype=np.float)
|
||||
for field in self.float_fields:
|
||||
res[field] = 0
|
||||
|
||||
# Return result quickly if tracker or gt sequence is empty
|
||||
if data['num_tracker_dets'] == 0:
|
||||
res['HOTA_FN'] = data['num_gt_dets'] * np.ones((len(self.array_labels)), dtype=np.float)
|
||||
res['LocA'] = np.ones((len(self.array_labels)), dtype=np.float)
|
||||
res['LocA(0)'] = 1.0
|
||||
return res
|
||||
if data['num_gt_dets'] == 0:
|
||||
res['HOTA_FP'] = data['num_tracker_dets'] * np.ones((len(self.array_labels)), dtype=np.float)
|
||||
res['LocA'] = np.ones((len(self.array_labels)), dtype=np.float)
|
||||
res['LocA(0)'] = 1.0
|
||||
return res
|
||||
|
||||
# Variables counting global association
|
||||
potential_matches_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
|
||||
gt_id_count = np.zeros((data['num_gt_ids'], 1))
|
||||
tracker_id_count = np.zeros((1, data['num_tracker_ids']))
|
||||
|
||||
# First loop through each timestep and accumulate global track information.
|
||||
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
|
||||
# Count the potential matches between ids in each timestep
|
||||
# These are normalised, weighted by the match similarity.
|
||||
similarity = data['similarity_scores'][t]
|
||||
sim_iou_denom = similarity.sum(0)[np.newaxis, :] + similarity.sum(1)[:, np.newaxis] - similarity
|
||||
sim_iou = np.zeros_like(similarity)
|
||||
sim_iou_mask = sim_iou_denom > 0 + np.finfo('float').eps
|
||||
sim_iou[sim_iou_mask] = similarity[sim_iou_mask] / sim_iou_denom[sim_iou_mask]
|
||||
potential_matches_count[gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]] += sim_iou
|
||||
|
||||
# Calculate the total number of dets for each gt_id and tracker_id.
|
||||
gt_id_count[gt_ids_t] += 1
|
||||
tracker_id_count[0, tracker_ids_t] += 1
|
||||
|
||||
# Calculate overall jaccard alignment score (before unique matching) between IDs
|
||||
global_alignment_score = potential_matches_count / (gt_id_count + tracker_id_count - potential_matches_count)
|
||||
matches_counts = [np.zeros_like(potential_matches_count) for _ in self.array_labels]
|
||||
|
||||
# Calculate scores for each timestep
|
||||
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
|
||||
# Deal with the case that there are no gt_det/tracker_det in a timestep.
|
||||
if len(gt_ids_t) == 0:
|
||||
for a, alpha in enumerate(self.array_labels):
|
||||
res['HOTA_FP'][a] += len(tracker_ids_t)
|
||||
continue
|
||||
if len(tracker_ids_t) == 0:
|
||||
for a, alpha in enumerate(self.array_labels):
|
||||
res['HOTA_FN'][a] += len(gt_ids_t)
|
||||
continue
|
||||
|
||||
# Get matching scores between pairs of dets for optimizing HOTA
|
||||
similarity = data['similarity_scores'][t]
|
||||
score_mat = global_alignment_score[gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]] * similarity
|
||||
|
||||
# Hungarian algorithm to find best matches
|
||||
match_rows, match_cols = linear_sum_assignment(-score_mat)
|
||||
|
||||
# Calculate and accumulate basic statistics
|
||||
for a, alpha in enumerate(self.array_labels):
|
||||
actually_matched_mask = similarity[match_rows, match_cols] >= alpha - np.finfo('float').eps
|
||||
alpha_match_rows = match_rows[actually_matched_mask]
|
||||
alpha_match_cols = match_cols[actually_matched_mask]
|
||||
num_matches = len(alpha_match_rows)
|
||||
res['HOTA_TP'][a] += num_matches
|
||||
res['HOTA_FN'][a] += len(gt_ids_t) - num_matches
|
||||
res['HOTA_FP'][a] += len(tracker_ids_t) - num_matches
|
||||
if num_matches > 0:
|
||||
res['LocA'][a] += sum(similarity[alpha_match_rows, alpha_match_cols])
|
||||
matches_counts[a][gt_ids_t[alpha_match_rows], tracker_ids_t[alpha_match_cols]] += 1
|
||||
|
||||
# Calculate association scores (AssA, AssRe, AssPr) for the alpha value.
|
||||
# First calculate scores per gt_id/tracker_id combo and then average over the number of detections.
|
||||
for a, alpha in enumerate(self.array_labels):
|
||||
matches_count = matches_counts[a]
|
||||
ass_a = matches_count / np.maximum(1, gt_id_count + tracker_id_count - matches_count)
|
||||
res['AssA'][a] = np.sum(matches_count * ass_a) / np.maximum(1, res['HOTA_TP'][a])
|
||||
ass_re = matches_count / np.maximum(1, gt_id_count)
|
||||
res['AssRe'][a] = np.sum(matches_count * ass_re) / np.maximum(1, res['HOTA_TP'][a])
|
||||
ass_pr = matches_count / np.maximum(1, tracker_id_count)
|
||||
res['AssPr'][a] = np.sum(matches_count * ass_pr) / np.maximum(1, res['HOTA_TP'][a])
|
||||
|
||||
# Calculate final scores
|
||||
res['LocA'] = np.maximum(1e-10, res['LocA']) / np.maximum(1e-10, res['HOTA_TP'])
|
||||
res = self._compute_final_fields(res)
|
||||
return res
|
||||
|
||||
def combine_sequences(self, all_res):
|
||||
"""Combines metrics across all sequences"""
|
||||
res = {}
|
||||
for field in self.integer_array_fields:
|
||||
res[field] = self._combine_sum(all_res, field)
|
||||
for field in ['AssRe', 'AssPr', 'AssA']:
|
||||
res[field] = self._combine_weighted_av(all_res, field, res, weight_field='HOTA_TP')
|
||||
loca_weighted_sum = sum([all_res[k]['LocA'] * all_res[k]['HOTA_TP'] for k in all_res.keys()])
|
||||
res['LocA'] = np.maximum(1e-10, loca_weighted_sum) / np.maximum(1e-10, res['HOTA_TP'])
|
||||
res = self._compute_final_fields(res)
|
||||
return res
|
||||
|
||||
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
|
||||
"""Combines metrics across all classes by averaging over the class values.
|
||||
If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
|
||||
"""
|
||||
res = {}
|
||||
for field in self.integer_array_fields:
|
||||
if ignore_empty_classes:
|
||||
res[field] = self._combine_sum(
|
||||
{k: v for k, v in all_res.items()
|
||||
if (v['HOTA_TP'] + v['HOTA_FN'] + v['HOTA_FP'] > 0 + np.finfo('float').eps).any()}, field)
|
||||
else:
|
||||
res[field] = self._combine_sum({k: v for k, v in all_res.items()}, field)
|
||||
|
||||
for field in self.float_fields + self.float_array_fields:
|
||||
if ignore_empty_classes:
|
||||
res[field] = np.mean([v[field] for v in all_res.values() if
|
||||
(v['HOTA_TP'] + v['HOTA_FN'] + v['HOTA_FP'] > 0 + np.finfo('float').eps).any()],
|
||||
axis=0)
|
||||
else:
|
||||
res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
|
||||
return res
|
||||
|
||||
def combine_classes_det_averaged(self, all_res):
|
||||
"""Combines metrics across all classes by averaging over the detection values"""
|
||||
res = {}
|
||||
for field in self.integer_array_fields:
|
||||
res[field] = self._combine_sum(all_res, field)
|
||||
for field in ['AssRe', 'AssPr', 'AssA']:
|
||||
res[field] = self._combine_weighted_av(all_res, field, res, weight_field='HOTA_TP')
|
||||
loca_weighted_sum = sum([all_res[k]['LocA'] * all_res[k]['HOTA_TP'] for k in all_res.keys()])
|
||||
res['LocA'] = np.maximum(1e-10, loca_weighted_sum) / np.maximum(1e-10, res['HOTA_TP'])
|
||||
res = self._compute_final_fields(res)
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def _compute_final_fields(res):
|
||||
"""Calculate sub-metric ('field') values which only depend on other sub-metric values.
|
||||
This function is used both for both per-sequence calculation, and in combining values across sequences.
|
||||
"""
|
||||
res['DetRe'] = res['HOTA_TP'] / np.maximum(1, res['HOTA_TP'] + res['HOTA_FN'])
|
||||
res['DetPr'] = res['HOTA_TP'] / np.maximum(1, res['HOTA_TP'] + res['HOTA_FP'])
|
||||
res['DetA'] = res['HOTA_TP'] / np.maximum(1, res['HOTA_TP'] + res['HOTA_FN'] + res['HOTA_FP'])
|
||||
res['HOTA'] = np.sqrt(res['DetA'] * res['AssA'])
|
||||
res['OWTA'] = np.sqrt(res['DetRe'] * res['AssA'])
|
||||
|
||||
res['HOTA(0)'] = res['HOTA'][0]
|
||||
res['LocA(0)'] = res['LocA'][0]
|
||||
res['HOTALocA(0)'] = res['HOTA(0)']*res['LocA(0)']
|
||||
return res
|
||||
|
||||
def plot_single_tracker_results(self, table_res, tracker, cls, output_folder):
|
||||
"""Create plot of results"""
|
||||
|
||||
# Only loaded when run to reduce minimum requirements
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
res = table_res['COMBINED_SEQ']
|
||||
styles_to_plot = ['r', 'b', 'g', 'b--', 'b:', 'g--', 'g:', 'm']
|
||||
for name, style in zip(self.float_array_fields, styles_to_plot):
|
||||
plt.plot(self.array_labels, res[name], style)
|
||||
plt.xlabel('alpha')
|
||||
plt.ylabel('score')
|
||||
plt.title(tracker + ' - ' + cls)
|
||||
plt.axis([0, 1, 0, 1])
|
||||
legend = []
|
||||
for name in self.float_array_fields:
|
||||
legend += [name + ' (' + str(np.round(np.mean(res[name]), 2)) + ')']
|
||||
plt.legend(legend, loc='lower left')
|
||||
out_file = os.path.join(output_folder, cls + '_plot.pdf')
|
||||
os.makedirs(os.path.dirname(out_file), exist_ok=True)
|
||||
plt.savefig(out_file)
|
||||
plt.savefig(out_file.replace('.pdf', '.png'))
|
||||
plt.clf()
|
||||
135
test/yolov7-tracker/tracker/trackeval/metrics/identity.py
Normal file
135
test/yolov7-tracker/tracker/trackeval/metrics/identity.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ._base_metric import _BaseMetric
|
||||
from .. import _timing
|
||||
from .. import utils
|
||||
|
||||
|
||||
class Identity(_BaseMetric):
|
||||
"""Class which implements the ID metrics"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_config():
|
||||
"""Default class config values"""
|
||||
default_config = {
|
||||
'THRESHOLD': 0.5, # Similarity score threshold required for a IDTP match. Default 0.5.
|
||||
'PRINT_CONFIG': True, # Whether to print the config information on init. Default: False.
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
super().__init__()
|
||||
self.integer_fields = ['IDTP', 'IDFN', 'IDFP']
|
||||
self.float_fields = ['IDF1', 'IDR', 'IDP']
|
||||
self.fields = self.float_fields + self.integer_fields
|
||||
self.summary_fields = self.fields
|
||||
|
||||
# Configuration options:
|
||||
self.config = utils.init_config(config, self.get_default_config(), self.get_name())
|
||||
self.threshold = float(self.config['THRESHOLD'])
|
||||
|
||||
@_timing.time
|
||||
def eval_sequence(self, data):
|
||||
"""Calculates ID metrics for one sequence"""
|
||||
# Initialise results
|
||||
res = {}
|
||||
for field in self.fields:
|
||||
res[field] = 0
|
||||
|
||||
# Return result quickly if tracker or gt sequence is empty
|
||||
if data['num_tracker_dets'] == 0:
|
||||
res['IDFN'] = data['num_gt_dets']
|
||||
return res
|
||||
if data['num_gt_dets'] == 0:
|
||||
res['IDFP'] = data['num_tracker_dets']
|
||||
return res
|
||||
|
||||
# Variables counting global association
|
||||
potential_matches_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
|
||||
gt_id_count = np.zeros(data['num_gt_ids'])
|
||||
tracker_id_count = np.zeros(data['num_tracker_ids'])
|
||||
|
||||
# First loop through each timestep and accumulate global track information.
|
||||
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
|
||||
# Count the potential matches between ids in each timestep
|
||||
matches_mask = np.greater_equal(data['similarity_scores'][t], self.threshold)
|
||||
match_idx_gt, match_idx_tracker = np.nonzero(matches_mask)
|
||||
potential_matches_count[gt_ids_t[match_idx_gt], tracker_ids_t[match_idx_tracker]] += 1
|
||||
|
||||
# Calculate the total number of dets for each gt_id and tracker_id.
|
||||
gt_id_count[gt_ids_t] += 1
|
||||
tracker_id_count[tracker_ids_t] += 1
|
||||
|
||||
# Calculate optimal assignment cost matrix for ID metrics
|
||||
num_gt_ids = data['num_gt_ids']
|
||||
num_tracker_ids = data['num_tracker_ids']
|
||||
fp_mat = np.zeros((num_gt_ids + num_tracker_ids, num_gt_ids + num_tracker_ids))
|
||||
fn_mat = np.zeros((num_gt_ids + num_tracker_ids, num_gt_ids + num_tracker_ids))
|
||||
fp_mat[num_gt_ids:, :num_tracker_ids] = 1e10
|
||||
fn_mat[:num_gt_ids, num_tracker_ids:] = 1e10
|
||||
for gt_id in range(num_gt_ids):
|
||||
fn_mat[gt_id, :num_tracker_ids] = gt_id_count[gt_id]
|
||||
fn_mat[gt_id, num_tracker_ids + gt_id] = gt_id_count[gt_id]
|
||||
for tracker_id in range(num_tracker_ids):
|
||||
fp_mat[:num_gt_ids, tracker_id] = tracker_id_count[tracker_id]
|
||||
fp_mat[tracker_id + num_gt_ids, tracker_id] = tracker_id_count[tracker_id]
|
||||
fn_mat[:num_gt_ids, :num_tracker_ids] -= potential_matches_count
|
||||
fp_mat[:num_gt_ids, :num_tracker_ids] -= potential_matches_count
|
||||
|
||||
# Hungarian algorithm
|
||||
match_rows, match_cols = linear_sum_assignment(fn_mat + fp_mat)
|
||||
|
||||
# Accumulate basic statistics
|
||||
res['IDFN'] = fn_mat[match_rows, match_cols].sum().astype(np.int)
|
||||
res['IDFP'] = fp_mat[match_rows, match_cols].sum().astype(np.int)
|
||||
res['IDTP'] = (gt_id_count.sum() - res['IDFN']).astype(np.int)
|
||||
|
||||
# Calculate final ID scores
|
||||
res = self._compute_final_fields(res)
|
||||
return res
|
||||
|
||||
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
|
||||
"""Combines metrics across all classes by averaging over the class values.
|
||||
If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
|
||||
"""
|
||||
res = {}
|
||||
for field in self.integer_fields:
|
||||
if ignore_empty_classes:
|
||||
res[field] = self._combine_sum({k: v for k, v in all_res.items()
|
||||
if v['IDTP'] + v['IDFN'] + v['IDFP'] > 0 + np.finfo('float').eps},
|
||||
field)
|
||||
else:
|
||||
res[field] = self._combine_sum({k: v for k, v in all_res.items()}, field)
|
||||
for field in self.float_fields:
|
||||
if ignore_empty_classes:
|
||||
res[field] = np.mean([v[field] for v in all_res.values()
|
||||
if v['IDTP'] + v['IDFN'] + v['IDFP'] > 0 + np.finfo('float').eps], axis=0)
|
||||
else:
|
||||
res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
|
||||
return res
|
||||
|
||||
def combine_classes_det_averaged(self, all_res):
|
||||
"""Combines metrics across all classes by averaging over the detection values"""
|
||||
res = {}
|
||||
for field in self.integer_fields:
|
||||
res[field] = self._combine_sum(all_res, field)
|
||||
res = self._compute_final_fields(res)
|
||||
return res
|
||||
|
||||
def combine_sequences(self, all_res):
|
||||
"""Combines metrics across all sequences"""
|
||||
res = {}
|
||||
for field in self.integer_fields:
|
||||
res[field] = self._combine_sum(all_res, field)
|
||||
res = self._compute_final_fields(res)
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def _compute_final_fields(res):
|
||||
"""Calculate sub-metric ('field') values which only depend on other sub-metric values.
|
||||
This function is used both for both per-sequence calculation, and in combining values across sequences.
|
||||
"""
|
||||
res['IDR'] = res['IDTP'] / np.maximum(1.0, res['IDTP'] + res['IDFN'])
|
||||
res['IDP'] = res['IDTP'] / np.maximum(1.0, res['IDTP'] + res['IDFP'])
|
||||
res['IDF1'] = res['IDTP'] / np.maximum(1.0, res['IDTP'] + 0.5 * res['IDFP'] + 0.5 * res['IDFN'])
|
||||
return res
|
||||
135
test/yolov7-tracker/tracker/trackeval/metrics/ideucl.py
Normal file
135
test/yolov7-tracker/tracker/trackeval/metrics/ideucl.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ._base_metric import _BaseMetric
|
||||
from .. import _timing
|
||||
from collections import defaultdict
|
||||
from .. import utils
|
||||
|
||||
|
||||
class IDEucl(_BaseMetric):
|
||||
"""Class which implements the ID metrics"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_config():
|
||||
"""Default class config values"""
|
||||
default_config = {
|
||||
'THRESHOLD': 0.4, # Similarity score threshold required for a IDTP match. 0.4 for IDEucl.
|
||||
'PRINT_CONFIG': True, # Whether to print the config information on init. Default: False.
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
super().__init__()
|
||||
self.fields = ['IDEucl']
|
||||
self.float_fields = self.fields
|
||||
self.summary_fields = self.fields
|
||||
|
||||
# Configuration options:
|
||||
self.config = utils.init_config(config, self.get_default_config(), self.get_name())
|
||||
self.threshold = float(self.config['THRESHOLD'])
|
||||
|
||||
|
||||
@_timing.time
|
||||
def eval_sequence(self, data):
|
||||
"""Calculates IDEucl metrics for all frames"""
|
||||
# Initialise results
|
||||
res = {'IDEucl' : 0}
|
||||
|
||||
# Return result quickly if tracker or gt sequence is empty
|
||||
if data['num_tracker_dets'] == 0 or data['num_gt_dets'] == 0.:
|
||||
return res
|
||||
|
||||
data['centroid'] = []
|
||||
for t, gt_det in enumerate(data['gt_dets']):
|
||||
# import pdb;pdb.set_trace()
|
||||
data['centroid'].append(self._compute_centroid(gt_det))
|
||||
|
||||
oid_hid_cent = defaultdict(list)
|
||||
oid_cent = defaultdict(list)
|
||||
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
|
||||
matches_mask = np.greater_equal(data['similarity_scores'][t], self.threshold)
|
||||
|
||||
# I hope the orders of ids and boxes are maintained in `data`
|
||||
for ind, gid in enumerate(gt_ids_t):
|
||||
oid_cent[gid].append(data['centroid'][t][ind])
|
||||
|
||||
match_idx_gt, match_idx_tracker = np.nonzero(matches_mask)
|
||||
for m_gid, m_tid in zip(match_idx_gt, match_idx_tracker):
|
||||
oid_hid_cent[gt_ids_t[m_gid], tracker_ids_t[m_tid]].append(data['centroid'][t][m_gid])
|
||||
|
||||
oid_hid_dist = {k : np.sum(np.linalg.norm(np.diff(np.array(v), axis=0), axis=1)) for k, v in oid_hid_cent.items()}
|
||||
oid_dist = {int(k) : np.sum(np.linalg.norm(np.diff(np.array(v), axis=0), axis=1)) for k, v in oid_cent.items()}
|
||||
|
||||
unique_oid = np.unique([i[0] for i in oid_hid_dist.keys()]).tolist()
|
||||
unique_hid = np.unique([i[1] for i in oid_hid_dist.keys()]).tolist()
|
||||
o_len = len(unique_oid)
|
||||
h_len = len(unique_hid)
|
||||
dist_matrix = np.zeros((o_len, h_len))
|
||||
for ((oid, hid), dist) in oid_hid_dist.items():
|
||||
oid_ind = unique_oid.index(oid)
|
||||
hid_ind = unique_hid.index(hid)
|
||||
dist_matrix[oid_ind, hid_ind] = dist
|
||||
|
||||
# opt_hyp_dist contains GT ID : max dist covered by track
|
||||
opt_hyp_dist = dict.fromkeys(oid_dist.keys(), 0.)
|
||||
cost_matrix = np.max(dist_matrix) - dist_matrix
|
||||
rows, cols = linear_sum_assignment(cost_matrix)
|
||||
for (row, col) in zip(rows, cols):
|
||||
value = dist_matrix[row, col]
|
||||
opt_hyp_dist[int(unique_oid[row])] = value
|
||||
|
||||
assert len(opt_hyp_dist.keys()) == len(oid_dist.keys())
|
||||
hyp_length = np.sum(list(opt_hyp_dist.values()))
|
||||
gt_length = np.sum(list(oid_dist.values()))
|
||||
id_eucl =np.mean([np.divide(a, b, out=np.zeros_like(a), where=b!=0) for a, b in zip(opt_hyp_dist.values(), oid_dist.values())])
|
||||
res['IDEucl'] = np.divide(hyp_length, gt_length, out=np.zeros_like(hyp_length), where=gt_length!=0)
|
||||
return res
|
||||
|
||||
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
|
||||
"""Combines metrics across all classes by averaging over the class values.
|
||||
If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
|
||||
"""
|
||||
res = {}
|
||||
|
||||
for field in self.float_fields:
|
||||
if ignore_empty_classes:
|
||||
res[field] = np.mean([v[field] for v in all_res.values()
|
||||
if v['IDEucl'] > 0 + np.finfo('float').eps], axis=0)
|
||||
else:
|
||||
res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
|
||||
return res
|
||||
|
||||
def combine_classes_det_averaged(self, all_res):
|
||||
"""Combines metrics across all classes by averaging over the detection values"""
|
||||
res = {}
|
||||
for field in self.float_fields:
|
||||
res[field] = self._combine_sum(all_res, field)
|
||||
res = self._compute_final_fields(res, len(all_res))
|
||||
return res
|
||||
|
||||
def combine_sequences(self, all_res):
|
||||
"""Combines metrics across all sequences"""
|
||||
res = {}
|
||||
for field in self.float_fields:
|
||||
res[field] = self._combine_sum(all_res, field)
|
||||
res = self._compute_final_fields(res, len(all_res))
|
||||
return res
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _compute_centroid(box):
|
||||
box = np.array(box)
|
||||
if len(box.shape) == 1:
|
||||
centroid = (box[0:2] + box[2:4])/2
|
||||
else:
|
||||
centroid = (box[:, 0:2] + box[:, 2:4])/2
|
||||
return np.flip(centroid, axis=1)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _compute_final_fields(res, res_len):
|
||||
"""
|
||||
Exists only to match signature with the original Identiy class.
|
||||
|
||||
"""
|
||||
return {k:v/res_len for k,v in res.items()}
|
||||
310
test/yolov7-tracker/tracker/trackeval/metrics/j_and_f.py
Normal file
310
test/yolov7-tracker/tracker/trackeval/metrics/j_and_f.py
Normal file
@@ -0,0 +1,310 @@
|
||||
|
||||
import numpy as np
|
||||
import math
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ..utils import TrackEvalException
|
||||
from ._base_metric import _BaseMetric
|
||||
from .. import _timing
|
||||
|
||||
|
||||
class JAndF(_BaseMetric):
|
||||
"""Class which implements the J&F metrics"""
|
||||
def __init__(self, config=None):
|
||||
super().__init__()
|
||||
self.integer_fields = ['num_gt_tracks']
|
||||
self.float_fields = ['J-Mean', 'J-Recall', 'J-Decay', 'F-Mean', 'F-Recall', 'F-Decay', 'J&F']
|
||||
self.fields = self.float_fields + self.integer_fields
|
||||
self.summary_fields = self.float_fields
|
||||
self.optim_type = 'J' # possible values J, J&F
|
||||
|
||||
@_timing.time
|
||||
def eval_sequence(self, data):
|
||||
"""Returns J&F metrics for one sequence"""
|
||||
|
||||
# Only loaded when run to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
|
||||
num_timesteps = data['num_timesteps']
|
||||
num_tracker_ids = data['num_tracker_ids']
|
||||
num_gt_ids = data['num_gt_ids']
|
||||
gt_dets = data['gt_dets']
|
||||
tracker_dets = data['tracker_dets']
|
||||
gt_ids = data['gt_ids']
|
||||
tracker_ids = data['tracker_ids']
|
||||
|
||||
# get shape of frames
|
||||
frame_shape = None
|
||||
if num_gt_ids > 0:
|
||||
for t in range(num_timesteps):
|
||||
if len(gt_ids[t]) > 0:
|
||||
frame_shape = gt_dets[t][0]['size']
|
||||
break
|
||||
elif num_tracker_ids > 0:
|
||||
for t in range(num_timesteps):
|
||||
if len(tracker_ids[t]) > 0:
|
||||
frame_shape = tracker_dets[t][0]['size']
|
||||
break
|
||||
|
||||
if frame_shape:
|
||||
# append all zero masks for timesteps in which tracks do not have a detection
|
||||
zero_padding = np.zeros((frame_shape), order= 'F').astype(np.uint8)
|
||||
padding_mask = mask_utils.encode(zero_padding)
|
||||
for t in range(num_timesteps):
|
||||
gt_id_det_mapping = {gt_ids[t][i]: gt_dets[t][i] for i in range(len(gt_ids[t]))}
|
||||
gt_dets[t] = [gt_id_det_mapping[index] if index in gt_ids[t] else padding_mask for index
|
||||
in range(num_gt_ids)]
|
||||
tracker_id_det_mapping = {tracker_ids[t][i]: tracker_dets[t][i] for i in range(len(tracker_ids[t]))}
|
||||
tracker_dets[t] = [tracker_id_det_mapping[index] if index in tracker_ids[t] else padding_mask for index
|
||||
in range(num_tracker_ids)]
|
||||
# also perform zero padding if number of tracker IDs < number of ground truth IDs
|
||||
if num_tracker_ids < num_gt_ids:
|
||||
diff = num_gt_ids - num_tracker_ids
|
||||
for t in range(num_timesteps):
|
||||
tracker_dets[t] = tracker_dets[t] + [padding_mask for _ in range(diff)]
|
||||
num_tracker_ids += diff
|
||||
|
||||
j = self._compute_j(gt_dets, tracker_dets, num_gt_ids, num_tracker_ids, num_timesteps)
|
||||
|
||||
# boundary threshold for F computation
|
||||
bound_th = 0.008
|
||||
|
||||
# perform matching
|
||||
if self.optim_type == 'J&F':
|
||||
f = np.zeros_like(j)
|
||||
for k in range(num_tracker_ids):
|
||||
for i in range(num_gt_ids):
|
||||
f[k, i, :] = self._compute_f(gt_dets, tracker_dets, k, i, bound_th)
|
||||
optim_metrics = (np.mean(j, axis=2) + np.mean(f, axis=2)) / 2
|
||||
row_ind, col_ind = linear_sum_assignment(- optim_metrics)
|
||||
j_m = j[row_ind, col_ind, :]
|
||||
f_m = f[row_ind, col_ind, :]
|
||||
elif self.optim_type == 'J':
|
||||
optim_metrics = np.mean(j, axis=2)
|
||||
row_ind, col_ind = linear_sum_assignment(- optim_metrics)
|
||||
j_m = j[row_ind, col_ind, :]
|
||||
f_m = np.zeros_like(j_m)
|
||||
for i, (tr_ind, gt_ind) in enumerate(zip(row_ind, col_ind)):
|
||||
f_m[i] = self._compute_f(gt_dets, tracker_dets, tr_ind, gt_ind, bound_th)
|
||||
else:
|
||||
raise TrackEvalException('Unsupported optimization type %s for J&F metric.' % self.optim_type)
|
||||
|
||||
# append zeros for false negatives
|
||||
if j_m.shape[0] < data['num_gt_ids']:
|
||||
diff = data['num_gt_ids'] - j_m.shape[0]
|
||||
j_m = np.concatenate((j_m, np.zeros((diff, j_m.shape[1]))), axis=0)
|
||||
f_m = np.concatenate((f_m, np.zeros((diff, f_m.shape[1]))), axis=0)
|
||||
|
||||
# compute the metrics for each ground truth track
|
||||
res = {
|
||||
'J-Mean': [np.nanmean(j_m[i, :]) for i in range(j_m.shape[0])],
|
||||
'J-Recall': [np.nanmean(j_m[i, :] > 0.5 + np.finfo('float').eps) for i in range(j_m.shape[0])],
|
||||
'F-Mean': [np.nanmean(f_m[i, :]) for i in range(f_m.shape[0])],
|
||||
'F-Recall': [np.nanmean(f_m[i, :] > 0.5 + np.finfo('float').eps) for i in range(f_m.shape[0])],
|
||||
'J-Decay': [],
|
||||
'F-Decay': []
|
||||
}
|
||||
n_bins = 4
|
||||
ids = np.round(np.linspace(1, data['num_timesteps'], n_bins + 1) + 1e-10) - 1
|
||||
ids = ids.astype(np.uint8)
|
||||
|
||||
for k in range(j_m.shape[0]):
|
||||
d_bins_j = [j_m[k][ids[i]:ids[i + 1] + 1] for i in range(0, n_bins)]
|
||||
res['J-Decay'].append(np.nanmean(d_bins_j[0]) - np.nanmean(d_bins_j[3]))
|
||||
for k in range(f_m.shape[0]):
|
||||
d_bins_f = [f_m[k][ids[i]:ids[i + 1] + 1] for i in range(0, n_bins)]
|
||||
res['F-Decay'].append(np.nanmean(d_bins_f[0]) - np.nanmean(d_bins_f[3]))
|
||||
|
||||
# count number of tracks for weighting of the result
|
||||
res['num_gt_tracks'] = len(res['J-Mean'])
|
||||
for field in ['J-Mean', 'J-Recall', 'J-Decay', 'F-Mean', 'F-Recall', 'F-Decay']:
|
||||
res[field] = np.mean(res[field])
|
||||
res['J&F'] = (res['J-Mean'] + res['F-Mean']) / 2
|
||||
return res
|
||||
|
||||
def combine_sequences(self, all_res):
|
||||
"""Combines metrics across all sequences"""
|
||||
res = {'num_gt_tracks': self._combine_sum(all_res, 'num_gt_tracks')}
|
||||
for field in self.summary_fields:
|
||||
res[field] = self._combine_weighted_av(all_res, field, res, weight_field='num_gt_tracks')
|
||||
return res
|
||||
|
||||
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
|
||||
"""Combines metrics across all classes by averaging over the class values
|
||||
'ignore empty classes' is not yet implemented here.
|
||||
"""
|
||||
res = {'num_gt_tracks': self._combine_sum(all_res, 'num_gt_tracks')}
|
||||
for field in self.float_fields:
|
||||
res[field] = np.mean([v[field] for v in all_res.values()])
|
||||
return res
|
||||
|
||||
def combine_classes_det_averaged(self, all_res):
|
||||
"""Combines metrics across all classes by averaging over the detection values"""
|
||||
res = {'num_gt_tracks': self._combine_sum(all_res, 'num_gt_tracks')}
|
||||
for field in self.float_fields:
|
||||
res[field] = np.mean([v[field] for v in all_res.values()])
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def _seg2bmap(seg, width=None, height=None):
|
||||
"""
|
||||
From a segmentation, compute a binary boundary map with 1 pixel wide
|
||||
boundaries. The boundary pixels are offset by 1/2 pixel towards the
|
||||
origin from the actual segment boundary.
|
||||
Arguments:
|
||||
seg : Segments labeled from 1..k.
|
||||
width : Width of desired bmap <= seg.shape[1]
|
||||
height : Height of desired bmap <= seg.shape[0]
|
||||
Returns:
|
||||
bmap (ndarray): Binary boundary map.
|
||||
David Martin <dmartin@eecs.berkeley.edu>
|
||||
January 2003
|
||||
"""
|
||||
|
||||
seg = seg.astype(np.bool)
|
||||
seg[seg > 0] = 1
|
||||
|
||||
assert np.atleast_3d(seg).shape[2] == 1
|
||||
|
||||
width = seg.shape[1] if width is None else width
|
||||
height = seg.shape[0] if height is None else height
|
||||
|
||||
h, w = seg.shape[:2]
|
||||
|
||||
ar1 = float(width) / float(height)
|
||||
ar2 = float(w) / float(h)
|
||||
|
||||
assert not (
|
||||
width > w | height > h | abs(ar1 - ar2) > 0.01
|
||||
), "Can" "t convert %dx%d seg to %dx%d bmap." % (w, h, width, height)
|
||||
|
||||
e = np.zeros_like(seg)
|
||||
s = np.zeros_like(seg)
|
||||
se = np.zeros_like(seg)
|
||||
|
||||
e[:, :-1] = seg[:, 1:]
|
||||
s[:-1, :] = seg[1:, :]
|
||||
se[:-1, :-1] = seg[1:, 1:]
|
||||
|
||||
b = seg ^ e | seg ^ s | seg ^ se
|
||||
b[-1, :] = seg[-1, :] ^ e[-1, :]
|
||||
b[:, -1] = seg[:, -1] ^ s[:, -1]
|
||||
b[-1, -1] = 0
|
||||
|
||||
if w == width and h == height:
|
||||
bmap = b
|
||||
else:
|
||||
bmap = np.zeros((height, width))
|
||||
for x in range(w):
|
||||
for y in range(h):
|
||||
if b[y, x]:
|
||||
j = 1 + math.floor((y - 1) + height / h)
|
||||
i = 1 + math.floor((x - 1) + width / h)
|
||||
bmap[j, i] = 1
|
||||
|
||||
return bmap
|
||||
|
||||
@staticmethod
|
||||
def _compute_f(gt_data, tracker_data, tracker_data_id, gt_id, bound_th):
|
||||
"""
|
||||
Perform F computation for a given gt and a given tracker ID. Adapted from
|
||||
https://github.com/davisvideochallenge/davis2017-evaluation
|
||||
:param gt_data: the encoded gt masks
|
||||
:param tracker_data: the encoded tracker masks
|
||||
:param tracker_data_id: the tracker ID
|
||||
:param gt_id: the ground truth ID
|
||||
:param bound_th: boundary threshold parameter
|
||||
:return: the F value for the given tracker and gt ID
|
||||
"""
|
||||
|
||||
# Only loaded when run to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
from skimage.morphology import disk
|
||||
import cv2
|
||||
|
||||
f = np.zeros(len(gt_data))
|
||||
|
||||
for t, (gt_masks, tracker_masks) in enumerate(zip(gt_data, tracker_data)):
|
||||
curr_tracker_mask = mask_utils.decode(tracker_masks[tracker_data_id])
|
||||
curr_gt_mask = mask_utils.decode(gt_masks[gt_id])
|
||||
|
||||
bound_pix = bound_th if bound_th >= 1 - np.finfo('float').eps else \
|
||||
np.ceil(bound_th * np.linalg.norm(curr_tracker_mask.shape))
|
||||
|
||||
# Get the pixel boundaries of both masks
|
||||
fg_boundary = JAndF._seg2bmap(curr_tracker_mask)
|
||||
gt_boundary = JAndF._seg2bmap(curr_gt_mask)
|
||||
|
||||
# fg_dil = binary_dilation(fg_boundary, disk(bound_pix))
|
||||
fg_dil = cv2.dilate(fg_boundary.astype(np.uint8), disk(bound_pix).astype(np.uint8))
|
||||
# gt_dil = binary_dilation(gt_boundary, disk(bound_pix))
|
||||
gt_dil = cv2.dilate(gt_boundary.astype(np.uint8), disk(bound_pix).astype(np.uint8))
|
||||
|
||||
# Get the intersection
|
||||
gt_match = gt_boundary * fg_dil
|
||||
fg_match = fg_boundary * gt_dil
|
||||
|
||||
# Area of the intersection
|
||||
n_fg = np.sum(fg_boundary)
|
||||
n_gt = np.sum(gt_boundary)
|
||||
|
||||
# % Compute precision and recall
|
||||
if n_fg == 0 and n_gt > 0:
|
||||
precision = 1
|
||||
recall = 0
|
||||
elif n_fg > 0 and n_gt == 0:
|
||||
precision = 0
|
||||
recall = 1
|
||||
elif n_fg == 0 and n_gt == 0:
|
||||
precision = 1
|
||||
recall = 1
|
||||
else:
|
||||
precision = np.sum(fg_match) / float(n_fg)
|
||||
recall = np.sum(gt_match) / float(n_gt)
|
||||
|
||||
# Compute F measure
|
||||
if precision + recall == 0:
|
||||
f_val = 0
|
||||
else:
|
||||
f_val = 2 * precision * recall / (precision + recall)
|
||||
|
||||
f[t] = f_val
|
||||
|
||||
return f
|
||||
|
||||
@staticmethod
|
||||
def _compute_j(gt_data, tracker_data, num_gt_ids, num_tracker_ids, num_timesteps):
|
||||
"""
|
||||
Computation of J value for all ground truth IDs and all tracker IDs in the given sequence. Adapted from
|
||||
https://github.com/davisvideochallenge/davis2017-evaluation
|
||||
:param gt_data: the ground truth masks
|
||||
:param tracker_data: the tracker masks
|
||||
:param num_gt_ids: the number of ground truth IDs
|
||||
:param num_tracker_ids: the number of tracker IDs
|
||||
:param num_timesteps: the number of timesteps
|
||||
:return: the J values
|
||||
"""
|
||||
|
||||
# Only loaded when run to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
|
||||
j = np.zeros((num_tracker_ids, num_gt_ids, num_timesteps))
|
||||
|
||||
for t, (time_gt, time_data) in enumerate(zip(gt_data, tracker_data)):
|
||||
# run length encoded masks with pycocotools
|
||||
area_gt = mask_utils.area(time_gt)
|
||||
time_data = list(time_data)
|
||||
area_tr = mask_utils.area(time_data)
|
||||
|
||||
area_tr = np.repeat(area_tr[:, np.newaxis], len(area_gt), axis=1)
|
||||
area_gt = np.repeat(area_gt[np.newaxis, :], len(area_tr), axis=0)
|
||||
|
||||
# mask iou computation with pycocotools
|
||||
ious = np.atleast_2d(mask_utils.iou(time_data, time_gt, [0]*len(time_gt)))
|
||||
# set iou to 1 if both masks are close to 0 (no ground truth and no predicted mask in timestep)
|
||||
ious[np.isclose(area_tr, 0) & np.isclose(area_gt, 0)] = 1
|
||||
assert (ious >= 0 - np.finfo('float').eps).all()
|
||||
assert (ious <= 1 + np.finfo('float').eps).all()
|
||||
|
||||
j[..., t] = ious
|
||||
|
||||
return j
|
||||
462
test/yolov7-tracker/tracker/trackeval/metrics/track_map.py
Normal file
462
test/yolov7-tracker/tracker/trackeval/metrics/track_map.py
Normal file
@@ -0,0 +1,462 @@
|
||||
import numpy as np
|
||||
from ._base_metric import _BaseMetric
|
||||
from .. import _timing
|
||||
from functools import partial
|
||||
from .. import utils
|
||||
from ..utils import TrackEvalException
|
||||
|
||||
|
||||
class TrackMAP(_BaseMetric):
|
||||
"""Class which implements the TrackMAP metrics"""
|
||||
|
||||
@staticmethod
|
||||
def get_default_metric_config():
|
||||
"""Default class config values"""
|
||||
default_config = {
|
||||
'USE_AREA_RANGES': True, # whether to evaluate for certain area ranges
|
||||
'AREA_RANGES': [[0 ** 2, 32 ** 2], # additional area range sets for which TrackMAP is evaluated
|
||||
[32 ** 2, 96 ** 2], # (all area range always included), default values for TAO
|
||||
[96 ** 2, 1e5 ** 2]], # evaluation
|
||||
'AREA_RANGE_LABELS': ["area_s", "area_m", "area_l"], # the labels for the area ranges
|
||||
'USE_TIME_RANGES': True, # whether to evaluate for certain time ranges (length of tracks)
|
||||
'TIME_RANGES': [[0, 3], [3, 10], [10, 1e5]], # additional time range sets for which TrackMAP is evaluated
|
||||
# (all time range always included) , default values for TAO evaluation
|
||||
'TIME_RANGE_LABELS': ["time_s", "time_m", "time_l"], # the labels for the time ranges
|
||||
'IOU_THRESHOLDS': np.arange(0.5, 0.96, 0.05), # the IoU thresholds
|
||||
'RECALL_THRESHOLDS': np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01) + 1), endpoint=True),
|
||||
# recall thresholds at which precision is evaluated
|
||||
'MAX_DETECTIONS': 0, # limit the maximum number of considered tracks per sequence (0 for unlimited)
|
||||
'PRINT_CONFIG': True
|
||||
}
|
||||
return default_config
|
||||
|
||||
def __init__(self, config=None):
|
||||
super().__init__()
|
||||
self.config = utils.init_config(config, self.get_default_metric_config(), self.get_name())
|
||||
|
||||
self.num_ig_masks = 1
|
||||
self.lbls = ['all']
|
||||
self.use_area_rngs = self.config['USE_AREA_RANGES']
|
||||
if self.use_area_rngs:
|
||||
self.area_rngs = self.config['AREA_RANGES']
|
||||
self.area_rng_lbls = self.config['AREA_RANGE_LABELS']
|
||||
self.num_ig_masks += len(self.area_rng_lbls)
|
||||
self.lbls += self.area_rng_lbls
|
||||
|
||||
self.use_time_rngs = self.config['USE_TIME_RANGES']
|
||||
if self.use_time_rngs:
|
||||
self.time_rngs = self.config['TIME_RANGES']
|
||||
self.time_rng_lbls = self.config['TIME_RANGE_LABELS']
|
||||
self.num_ig_masks += len(self.time_rng_lbls)
|
||||
self.lbls += self.time_rng_lbls
|
||||
|
||||
self.array_labels = self.config['IOU_THRESHOLDS']
|
||||
self.rec_thrs = self.config['RECALL_THRESHOLDS']
|
||||
|
||||
self.maxDet = self.config['MAX_DETECTIONS']
|
||||
self.float_array_fields = ['AP_' + lbl for lbl in self.lbls] + ['AR_' + lbl for lbl in self.lbls]
|
||||
self.fields = self.float_array_fields
|
||||
self.summary_fields = self.float_array_fields
|
||||
|
||||
@_timing.time
|
||||
def eval_sequence(self, data):
|
||||
"""Calculates GT and Tracker matches for one sequence for TrackMAP metrics. Adapted from
|
||||
https://github.com/TAO-Dataset/"""
|
||||
|
||||
# Initialise results to zero for each sequence as the fields are only defined over the set of all sequences
|
||||
res = {}
|
||||
for field in self.fields:
|
||||
res[field] = [0 for _ in self.array_labels]
|
||||
|
||||
gt_ids, dt_ids = data['gt_track_ids'], data['dt_track_ids']
|
||||
|
||||
if len(gt_ids) == 0 and len(dt_ids) == 0:
|
||||
for idx in range(self.num_ig_masks):
|
||||
res[idx] = None
|
||||
return res
|
||||
|
||||
# get track data
|
||||
gt_tr_areas = data.get('gt_track_areas', None) if self.use_area_rngs else None
|
||||
gt_tr_lengths = data.get('gt_track_lengths', None) if self.use_time_rngs else None
|
||||
gt_tr_iscrowd = data.get('gt_track_iscrowd', None)
|
||||
dt_tr_areas = data.get('dt_track_areas', None) if self.use_area_rngs else None
|
||||
dt_tr_lengths = data.get('dt_track_lengths', None) if self.use_time_rngs else None
|
||||
is_nel = data.get('not_exhaustively_labeled', False)
|
||||
|
||||
# compute ignore masks for different track sets to eval
|
||||
gt_ig_masks = self._compute_track_ig_masks(len(gt_ids), track_lengths=gt_tr_lengths, track_areas=gt_tr_areas,
|
||||
iscrowd=gt_tr_iscrowd)
|
||||
dt_ig_masks = self._compute_track_ig_masks(len(dt_ids), track_lengths=dt_tr_lengths, track_areas=dt_tr_areas,
|
||||
is_not_exhaustively_labeled=is_nel, is_gt=False)
|
||||
|
||||
boxformat = data.get('boxformat', 'xywh')
|
||||
ious = self._compute_track_ious(data['dt_tracks'], data['gt_tracks'], iou_function=data['iou_type'],
|
||||
boxformat=boxformat)
|
||||
|
||||
for mask_idx in range(self.num_ig_masks):
|
||||
gt_ig_mask = gt_ig_masks[mask_idx]
|
||||
|
||||
# Sort gt ignore last
|
||||
gt_idx = np.argsort([g for g in gt_ig_mask], kind="mergesort")
|
||||
gt_ids = [gt_ids[i] for i in gt_idx]
|
||||
|
||||
ious_sorted = ious[:, gt_idx] if len(ious) > 0 else ious
|
||||
|
||||
num_thrs = len(self.array_labels)
|
||||
num_gt = len(gt_ids)
|
||||
num_dt = len(dt_ids)
|
||||
|
||||
# Array to store the "id" of the matched dt/gt
|
||||
gt_m = np.zeros((num_thrs, num_gt)) - 1
|
||||
dt_m = np.zeros((num_thrs, num_dt)) - 1
|
||||
|
||||
gt_ig = np.array([gt_ig_mask[idx] for idx in gt_idx])
|
||||
dt_ig = np.zeros((num_thrs, num_dt))
|
||||
|
||||
for iou_thr_idx, iou_thr in enumerate(self.array_labels):
|
||||
if len(ious_sorted) == 0:
|
||||
break
|
||||
|
||||
for dt_idx, _dt in enumerate(dt_ids):
|
||||
iou = min([iou_thr, 1 - 1e-10])
|
||||
# information about best match so far (m=-1 -> unmatched)
|
||||
# store the gt_idx which matched for _dt
|
||||
m = -1
|
||||
for gt_idx, _ in enumerate(gt_ids):
|
||||
# if this gt already matched continue
|
||||
if gt_m[iou_thr_idx, gt_idx] > 0:
|
||||
continue
|
||||
# if _dt matched to reg gt, and on ignore gt, stop
|
||||
if m > -1 and gt_ig[m] == 0 and gt_ig[gt_idx] == 1:
|
||||
break
|
||||
# continue to next gt unless better match made
|
||||
if ious_sorted[dt_idx, gt_idx] < iou - np.finfo('float').eps:
|
||||
continue
|
||||
# if match successful and best so far, store appropriately
|
||||
iou = ious_sorted[dt_idx, gt_idx]
|
||||
m = gt_idx
|
||||
|
||||
# No match found for _dt, go to next _dt
|
||||
if m == -1:
|
||||
continue
|
||||
|
||||
# if gt to ignore for some reason update dt_ig.
|
||||
# Should not be used in evaluation.
|
||||
dt_ig[iou_thr_idx, dt_idx] = gt_ig[m]
|
||||
# _dt match found, update gt_m, and dt_m with "id"
|
||||
dt_m[iou_thr_idx, dt_idx] = gt_ids[m]
|
||||
gt_m[iou_thr_idx, m] = _dt
|
||||
|
||||
dt_ig_mask = dt_ig_masks[mask_idx]
|
||||
|
||||
dt_ig_mask = np.array(dt_ig_mask).reshape((1, num_dt)) # 1 X num_dt
|
||||
dt_ig_mask = np.repeat(dt_ig_mask, num_thrs, 0) # num_thrs X num_dt
|
||||
|
||||
# Based on dt_ig_mask ignore any unmatched detection by updating dt_ig
|
||||
dt_ig = np.logical_or(dt_ig, np.logical_and(dt_m == -1, dt_ig_mask))
|
||||
# store results for given video and category
|
||||
res[mask_idx] = {
|
||||
"dt_ids": dt_ids,
|
||||
"gt_ids": gt_ids,
|
||||
"dt_matches": dt_m,
|
||||
"gt_matches": gt_m,
|
||||
"dt_scores": data['dt_track_scores'],
|
||||
"gt_ignore": gt_ig,
|
||||
"dt_ignore": dt_ig,
|
||||
}
|
||||
|
||||
return res
|
||||
|
||||
def combine_sequences(self, all_res):
|
||||
"""Combines metrics across all sequences. Computes precision and recall values based on track matches.
|
||||
Adapted from https://github.com/TAO-Dataset/
|
||||
"""
|
||||
num_thrs = len(self.array_labels)
|
||||
num_recalls = len(self.rec_thrs)
|
||||
|
||||
# -1 for absent categories
|
||||
precision = -np.ones(
|
||||
(num_thrs, num_recalls, self.num_ig_masks)
|
||||
)
|
||||
recall = -np.ones((num_thrs, self.num_ig_masks))
|
||||
|
||||
for ig_idx in range(self.num_ig_masks):
|
||||
ig_idx_results = [res[ig_idx] for res in all_res.values() if res[ig_idx] is not None]
|
||||
|
||||
# Remove elements which are None
|
||||
if len(ig_idx_results) == 0:
|
||||
continue
|
||||
|
||||
# Append all scores: shape (N,)
|
||||
# limit considered tracks for each sequence if maxDet > 0
|
||||
if self.maxDet == 0:
|
||||
dt_scores = np.concatenate([res["dt_scores"] for res in ig_idx_results], axis=0)
|
||||
|
||||
dt_idx = np.argsort(-dt_scores, kind="mergesort")
|
||||
|
||||
dt_m = np.concatenate([e["dt_matches"] for e in ig_idx_results],
|
||||
axis=1)[:, dt_idx]
|
||||
dt_ig = np.concatenate([e["dt_ignore"] for e in ig_idx_results],
|
||||
axis=1)[:, dt_idx]
|
||||
elif self.maxDet > 0:
|
||||
dt_scores = np.concatenate([res["dt_scores"][0:self.maxDet] for res in ig_idx_results], axis=0)
|
||||
|
||||
dt_idx = np.argsort(-dt_scores, kind="mergesort")
|
||||
|
||||
dt_m = np.concatenate([e["dt_matches"][:, 0:self.maxDet] for e in ig_idx_results],
|
||||
axis=1)[:, dt_idx]
|
||||
dt_ig = np.concatenate([e["dt_ignore"][:, 0:self.maxDet] for e in ig_idx_results],
|
||||
axis=1)[:, dt_idx]
|
||||
else:
|
||||
raise Exception("Number of maximum detections must be >= 0, but is set to %i" % self.maxDet)
|
||||
|
||||
gt_ig = np.concatenate([res["gt_ignore"] for res in ig_idx_results])
|
||||
# num gt anns to consider
|
||||
num_gt = np.count_nonzero(gt_ig == 0)
|
||||
|
||||
if num_gt == 0:
|
||||
continue
|
||||
|
||||
tps = np.logical_and(dt_m != -1, np.logical_not(dt_ig))
|
||||
fps = np.logical_and(dt_m == -1, np.logical_not(dt_ig))
|
||||
|
||||
tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
|
||||
fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
|
||||
|
||||
for iou_thr_idx, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
|
||||
tp = np.array(tp)
|
||||
fp = np.array(fp)
|
||||
num_tp = len(tp)
|
||||
rc = tp / num_gt
|
||||
if num_tp:
|
||||
recall[iou_thr_idx, ig_idx] = rc[-1]
|
||||
else:
|
||||
recall[iou_thr_idx, ig_idx] = 0
|
||||
|
||||
# np.spacing(1) ~= eps
|
||||
pr = tp / (fp + tp + np.spacing(1))
|
||||
pr = pr.tolist()
|
||||
|
||||
# Ensure precision values are monotonically decreasing
|
||||
for i in range(num_tp - 1, 0, -1):
|
||||
if pr[i] > pr[i - 1]:
|
||||
pr[i - 1] = pr[i]
|
||||
|
||||
# find indices at the predefined recall values
|
||||
rec_thrs_insert_idx = np.searchsorted(rc, self.rec_thrs, side="left")
|
||||
|
||||
pr_at_recall = [0.0] * num_recalls
|
||||
|
||||
try:
|
||||
for _idx, pr_idx in enumerate(rec_thrs_insert_idx):
|
||||
pr_at_recall[_idx] = pr[pr_idx]
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
precision[iou_thr_idx, :, ig_idx] = (np.array(pr_at_recall))
|
||||
|
||||
res = {'precision': precision, 'recall': recall}
|
||||
|
||||
# compute the precision and recall averages for the respective alpha thresholds and ignore masks
|
||||
for lbl in self.lbls:
|
||||
res['AP_' + lbl] = np.zeros((len(self.array_labels)), dtype=np.float)
|
||||
res['AR_' + lbl] = np.zeros((len(self.array_labels)), dtype=np.float)
|
||||
|
||||
for a_id, alpha in enumerate(self.array_labels):
|
||||
for lbl_idx, lbl in enumerate(self.lbls):
|
||||
p = precision[a_id, :, lbl_idx]
|
||||
if len(p[p > -1]) == 0:
|
||||
mean_p = -1
|
||||
else:
|
||||
mean_p = np.mean(p[p > -1])
|
||||
res['AP_' + lbl][a_id] = mean_p
|
||||
res['AR_' + lbl][a_id] = recall[a_id, lbl_idx]
|
||||
|
||||
return res
|
||||
|
||||
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=True):
|
||||
"""Combines metrics across all classes by averaging over the class values
|
||||
Note mAP is not well defined for 'empty classes' so 'ignore empty classes' is always true here.
|
||||
"""
|
||||
res = {}
|
||||
for field in self.fields:
|
||||
res[field] = np.zeros((len(self.array_labels)), dtype=np.float)
|
||||
field_stacked = np.array([res[field] for res in all_res.values()])
|
||||
|
||||
for a_id, alpha in enumerate(self.array_labels):
|
||||
values = field_stacked[:, a_id]
|
||||
if len(values[values > -1]) == 0:
|
||||
mean = -1
|
||||
else:
|
||||
mean = np.mean(values[values > -1])
|
||||
res[field][a_id] = mean
|
||||
return res
|
||||
|
||||
def combine_classes_det_averaged(self, all_res):
|
||||
"""Combines metrics across all classes by averaging over the detection values"""
|
||||
|
||||
res = {}
|
||||
for field in self.fields:
|
||||
res[field] = np.zeros((len(self.array_labels)), dtype=np.float)
|
||||
field_stacked = np.array([res[field] for res in all_res.values()])
|
||||
|
||||
for a_id, alpha in enumerate(self.array_labels):
|
||||
values = field_stacked[:, a_id]
|
||||
if len(values[values > -1]) == 0:
|
||||
mean = -1
|
||||
else:
|
||||
mean = np.mean(values[values > -1])
|
||||
res[field][a_id] = mean
|
||||
return res
|
||||
|
||||
def _compute_track_ig_masks(self, num_ids, track_lengths=None, track_areas=None, iscrowd=None,
|
||||
is_not_exhaustively_labeled=False, is_gt=True):
|
||||
"""
|
||||
Computes ignore masks for different track sets to evaluate
|
||||
:param num_ids: the number of track IDs
|
||||
:param track_lengths: the lengths of the tracks (number of timesteps)
|
||||
:param track_areas: the average area of a track
|
||||
:param iscrowd: whether a track is marked as crowd
|
||||
:param is_not_exhaustively_labeled: whether the track category is not exhaustively labeled
|
||||
:param is_gt: whether it is gt
|
||||
:return: the track ignore masks
|
||||
"""
|
||||
# for TAO tracks for classes which are not exhaustively labeled are not evaluated
|
||||
if not is_gt and is_not_exhaustively_labeled:
|
||||
track_ig_masks = [[1 for _ in range(num_ids)] for i in range(self.num_ig_masks)]
|
||||
else:
|
||||
# consider all tracks
|
||||
track_ig_masks = [[0 for _ in range(num_ids)]]
|
||||
|
||||
# consider tracks with certain area
|
||||
if self.use_area_rngs:
|
||||
for rng in self.area_rngs:
|
||||
track_ig_masks.append([0 if rng[0] - np.finfo('float').eps <= area <= rng[1] + np.finfo('float').eps
|
||||
else 1 for area in track_areas])
|
||||
|
||||
# consider tracks with certain duration
|
||||
if self.use_time_rngs:
|
||||
for rng in self.time_rngs:
|
||||
track_ig_masks.append([0 if rng[0] - np.finfo('float').eps <= length
|
||||
<= rng[1] + np.finfo('float').eps else 1 for length in track_lengths])
|
||||
|
||||
# for YouTubeVIS evaluation tracks with crowd tag are not evaluated
|
||||
if is_gt and iscrowd:
|
||||
track_ig_masks = [np.logical_or(mask, iscrowd) for mask in track_ig_masks]
|
||||
|
||||
return track_ig_masks
|
||||
|
||||
@staticmethod
|
||||
def _compute_bb_track_iou(dt_track, gt_track, boxformat='xywh'):
|
||||
"""
|
||||
Calculates the track IoU for one detected track and one ground truth track for bounding boxes
|
||||
:param dt_track: the detected track (format: dictionary with frame index as keys and
|
||||
numpy arrays as values)
|
||||
:param gt_track: the ground truth track (format: dictionary with frame index as keys and
|
||||
numpy array as values)
|
||||
:param boxformat: the format of the boxes
|
||||
:return: the track IoU
|
||||
"""
|
||||
intersect = 0
|
||||
union = 0
|
||||
image_ids = set(gt_track.keys()) | set(dt_track.keys())
|
||||
for image in image_ids:
|
||||
g = gt_track.get(image, None)
|
||||
d = dt_track.get(image, None)
|
||||
if boxformat == 'xywh':
|
||||
if d is not None and g is not None:
|
||||
dx, dy, dw, dh = d
|
||||
gx, gy, gw, gh = g
|
||||
w = max(min(dx + dw, gx + gw) - max(dx, gx), 0)
|
||||
h = max(min(dy + dh, gy + gh) - max(dy, gy), 0)
|
||||
i = w * h
|
||||
u = dw * dh + gw * gh - i
|
||||
intersect += i
|
||||
union += u
|
||||
elif d is None and g is not None:
|
||||
union += g[2] * g[3]
|
||||
elif d is not None and g is None:
|
||||
union += d[2] * d[3]
|
||||
elif boxformat == 'x0y0x1y1':
|
||||
if d is not None and g is not None:
|
||||
dx0, dy0, dx1, dy1 = d
|
||||
gx0, gy0, gx1, gy1 = g
|
||||
w = max(min(dx1, gx1) - max(dx0, gx0), 0)
|
||||
h = max(min(dy1, gy1) - max(dy0, gy0), 0)
|
||||
i = w * h
|
||||
u = (dx1 - dx0) * (dy1 - dy0) + (gx1 - gx0) * (gy1 - gy0) - i
|
||||
intersect += i
|
||||
union += u
|
||||
elif d is None and g is not None:
|
||||
union += (g[2] - g[0]) * (g[3] - g[1])
|
||||
elif d is not None and g is None:
|
||||
union += (d[2] - d[0]) * (d[3] - d[1])
|
||||
else:
|
||||
raise TrackEvalException('BoxFormat not implemented')
|
||||
if intersect > union:
|
||||
raise TrackEvalException("Intersection value > union value. Are the box values corrupted?")
|
||||
return intersect / union if union > 0 else 0
|
||||
|
||||
@staticmethod
|
||||
def _compute_mask_track_iou(dt_track, gt_track):
|
||||
"""
|
||||
Calculates the track IoU for one detected track and one ground truth track for segmentation masks
|
||||
:param dt_track: the detected track (format: dictionary with frame index as keys and
|
||||
pycocotools rle encoded masks as values)
|
||||
:param gt_track: the ground truth track (format: dictionary with frame index as keys and
|
||||
pycocotools rle encoded masks as values)
|
||||
:return: the track IoU
|
||||
"""
|
||||
# only loaded when needed to reduce minimum requirements
|
||||
from pycocotools import mask as mask_utils
|
||||
|
||||
intersect = .0
|
||||
union = .0
|
||||
image_ids = set(gt_track.keys()) | set(dt_track.keys())
|
||||
for image in image_ids:
|
||||
g = gt_track.get(image, None)
|
||||
d = dt_track.get(image, None)
|
||||
if d and g:
|
||||
intersect += mask_utils.area(mask_utils.merge([d, g], True))
|
||||
union += mask_utils.area(mask_utils.merge([d, g], False))
|
||||
elif not d and g:
|
||||
union += mask_utils.area(g)
|
||||
elif d and not g:
|
||||
union += mask_utils.area(d)
|
||||
if union < 0.0 - np.finfo('float').eps:
|
||||
raise TrackEvalException("Union value < 0. Are the segmentaions corrupted?")
|
||||
if intersect > union:
|
||||
raise TrackEvalException("Intersection value > union value. Are the segmentations corrupted?")
|
||||
iou = intersect / union if union > 0.0 + np.finfo('float').eps else 0.0
|
||||
return iou
|
||||
|
||||
@staticmethod
|
||||
def _compute_track_ious(dt, gt, iou_function='bbox', boxformat='xywh'):
|
||||
"""
|
||||
Calculate track IoUs for a set of ground truth tracks and a set of detected tracks
|
||||
"""
|
||||
|
||||
if len(gt) == 0 and len(dt) == 0:
|
||||
return []
|
||||
|
||||
if iou_function == 'bbox':
|
||||
track_iou_function = partial(TrackMAP._compute_bb_track_iou, boxformat=boxformat)
|
||||
elif iou_function == 'mask':
|
||||
track_iou_function = partial(TrackMAP._compute_mask_track_iou)
|
||||
else:
|
||||
raise Exception('IoU function not implemented')
|
||||
|
||||
ious = np.zeros([len(dt), len(gt)])
|
||||
for i, j in np.ndindex(ious.shape):
|
||||
ious[i, j] = track_iou_function(dt[i], gt[j])
|
||||
return ious
|
||||
|
||||
@staticmethod
|
||||
def _row_print(*argv):
|
||||
"""Prints results in an evenly spaced rows, with more space in first row"""
|
||||
if len(argv) == 1:
|
||||
argv = argv[0]
|
||||
to_print = '%-40s' % argv[0]
|
||||
for v in argv[1:]:
|
||||
to_print += '%-12s' % str(v)
|
||||
print(to_print)
|
||||
131
test/yolov7-tracker/tracker/trackeval/metrics/vace.py
Normal file
131
test/yolov7-tracker/tracker/trackeval/metrics/vace.py
Normal file
@@ -0,0 +1,131 @@
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from ._base_metric import _BaseMetric
|
||||
from .. import _timing
|
||||
|
||||
|
||||
class VACE(_BaseMetric):
|
||||
"""Class which implements the VACE metrics.
|
||||
|
||||
The metrics are described in:
|
||||
Manohar et al. (2006) "Performance Evaluation of Object Detection and Tracking in Video"
|
||||
https://link.springer.com/chapter/10.1007/11612704_16
|
||||
|
||||
This implementation uses the "relaxed" variant of the metrics,
|
||||
where an overlap threshold is applied in each frame.
|
||||
"""
|
||||
|
||||
def __init__(self, config=None):
|
||||
super().__init__()
|
||||
self.integer_fields = ['VACE_IDs', 'VACE_GT_IDs', 'num_non_empty_timesteps']
|
||||
self.float_fields = ['STDA', 'ATA', 'FDA', 'SFDA']
|
||||
self.fields = self.integer_fields + self.float_fields
|
||||
self.summary_fields = ['SFDA', 'ATA']
|
||||
|
||||
# Fields that are accumulated over multiple videos.
|
||||
self._additive_fields = self.integer_fields + ['STDA', 'FDA']
|
||||
|
||||
self.threshold = 0.5
|
||||
|
||||
@_timing.time
|
||||
def eval_sequence(self, data):
|
||||
"""Calculates VACE metrics for one sequence.
|
||||
|
||||
Depends on the fields:
|
||||
data['num_gt_ids']
|
||||
data['num_tracker_ids']
|
||||
data['gt_ids']
|
||||
data['tracker_ids']
|
||||
data['similarity_scores']
|
||||
"""
|
||||
res = {}
|
||||
|
||||
# Obtain Average Tracking Accuracy (ATA) using track correspondence.
|
||||
# Obtain counts necessary to compute temporal IOU.
|
||||
# Assume that integer counts can be represented exactly as floats.
|
||||
potential_matches_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
|
||||
gt_id_count = np.zeros(data['num_gt_ids'])
|
||||
tracker_id_count = np.zeros(data['num_tracker_ids'])
|
||||
both_present_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
|
||||
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
|
||||
# Count the number of frames in which two tracks satisfy the overlap criterion.
|
||||
matches_mask = np.greater_equal(data['similarity_scores'][t], self.threshold)
|
||||
match_idx_gt, match_idx_tracker = np.nonzero(matches_mask)
|
||||
potential_matches_count[gt_ids_t[match_idx_gt], tracker_ids_t[match_idx_tracker]] += 1
|
||||
# Count the number of frames in which the tracks are present.
|
||||
gt_id_count[gt_ids_t] += 1
|
||||
tracker_id_count[tracker_ids_t] += 1
|
||||
both_present_count[gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]] += 1
|
||||
# Number of frames in which either track is present (union of the two sets of frames).
|
||||
union_count = (gt_id_count[:, np.newaxis]
|
||||
+ tracker_id_count[np.newaxis, :]
|
||||
- both_present_count)
|
||||
# The denominator should always be non-zero if all tracks are non-empty.
|
||||
with np.errstate(divide='raise', invalid='raise'):
|
||||
temporal_iou = potential_matches_count / union_count
|
||||
# Find assignment that maximizes temporal IOU.
|
||||
match_rows, match_cols = linear_sum_assignment(-temporal_iou)
|
||||
res['STDA'] = temporal_iou[match_rows, match_cols].sum()
|
||||
res['VACE_IDs'] = data['num_tracker_ids']
|
||||
res['VACE_GT_IDs'] = data['num_gt_ids']
|
||||
|
||||
# Obtain Frame Detection Accuracy (FDA) using per-frame correspondence.
|
||||
non_empty_count = 0
|
||||
fda = 0
|
||||
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
|
||||
n_g = len(gt_ids_t)
|
||||
n_d = len(tracker_ids_t)
|
||||
if not (n_g or n_d):
|
||||
continue
|
||||
# n_g > 0 or n_d > 0
|
||||
non_empty_count += 1
|
||||
if not (n_g and n_d):
|
||||
continue
|
||||
# n_g > 0 and n_d > 0
|
||||
spatial_overlap = data['similarity_scores'][t]
|
||||
match_rows, match_cols = linear_sum_assignment(-spatial_overlap)
|
||||
overlap_ratio = spatial_overlap[match_rows, match_cols].sum()
|
||||
fda += overlap_ratio / (0.5 * (n_g + n_d))
|
||||
res['FDA'] = fda
|
||||
res['num_non_empty_timesteps'] = non_empty_count
|
||||
|
||||
res.update(self._compute_final_fields(res))
|
||||
return res
|
||||
|
||||
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=True):
|
||||
"""Combines metrics across all classes by averaging over the class values.
|
||||
If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
|
||||
"""
|
||||
res = {}
|
||||
for field in self.fields:
|
||||
if ignore_empty_classes:
|
||||
res[field] = np.mean([v[field] for v in all_res.values()
|
||||
if v['VACE_GT_IDs'] > 0 or v['VACE_IDs'] > 0], axis=0)
|
||||
else:
|
||||
res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
|
||||
return res
|
||||
|
||||
def combine_classes_det_averaged(self, all_res):
|
||||
"""Combines metrics across all classes by averaging over the detection values"""
|
||||
res = {}
|
||||
for field in self._additive_fields:
|
||||
res[field] = _BaseMetric._combine_sum(all_res, field)
|
||||
res = self._compute_final_fields(res)
|
||||
return res
|
||||
|
||||
def combine_sequences(self, all_res):
|
||||
"""Combines metrics across all sequences"""
|
||||
res = {}
|
||||
for header in self._additive_fields:
|
||||
res[header] = _BaseMetric._combine_sum(all_res, header)
|
||||
res.update(self._compute_final_fields(res))
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def _compute_final_fields(additive):
|
||||
final = {}
|
||||
with np.errstate(invalid='ignore'): # Permit nan results.
|
||||
final['ATA'] = (additive['STDA'] /
|
||||
(0.5 * (additive['VACE_IDs'] + additive['VACE_GT_IDs'])))
|
||||
final['SFDA'] = additive['FDA'] / additive['num_non_empty_timesteps']
|
||||
return final
|
||||
230
test/yolov7-tracker/tracker/trackeval/plotting.py
Normal file
230
test/yolov7-tracker/tracker/trackeval/plotting.py
Normal file
@@ -0,0 +1,230 @@
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
from .utils import TrackEvalException
|
||||
|
||||
|
||||
def plot_compare_trackers(tracker_folder, tracker_list, cls, output_folder, plots_list=None):
|
||||
"""Create plots which compare metrics across different trackers."""
|
||||
# Define what to plot
|
||||
if plots_list is None:
|
||||
plots_list = get_default_plots_list()
|
||||
|
||||
# Load data
|
||||
data = load_multiple_tracker_summaries(tracker_folder, tracker_list, cls)
|
||||
out_loc = os.path.join(output_folder, cls)
|
||||
|
||||
# Plot
|
||||
for args in plots_list:
|
||||
create_comparison_plot(data, out_loc, *args)
|
||||
|
||||
|
||||
def get_default_plots_list():
|
||||
# y_label, x_label, sort_label, bg_label, bg_function
|
||||
plots_list = [
|
||||
['AssA', 'DetA', 'HOTA', 'HOTA', 'geometric_mean'],
|
||||
['AssPr', 'AssRe', 'HOTA', 'AssA', 'jaccard'],
|
||||
['DetPr', 'DetRe', 'HOTA', 'DetA', 'jaccard'],
|
||||
['HOTA(0)', 'LocA(0)', 'HOTA', 'HOTALocA(0)', 'multiplication'],
|
||||
['HOTA', 'LocA', 'HOTA', None, None],
|
||||
|
||||
['HOTA', 'MOTA', 'HOTA', None, None],
|
||||
['HOTA', 'IDF1', 'HOTA', None, None],
|
||||
['IDF1', 'MOTA', 'HOTA', None, None],
|
||||
]
|
||||
return plots_list
|
||||
|
||||
|
||||
def load_multiple_tracker_summaries(tracker_folder, tracker_list, cls):
|
||||
"""Loads summary data for multiple trackers."""
|
||||
data = {}
|
||||
for tracker in tracker_list:
|
||||
with open(os.path.join(tracker_folder, tracker, cls + '_summary.txt')) as f:
|
||||
keys = next(f).split(' ')
|
||||
done = False
|
||||
while not done:
|
||||
values = next(f).split(' ')
|
||||
if len(values) == len(keys):
|
||||
done = True
|
||||
data[tracker] = dict(zip(keys, map(float, values)))
|
||||
return data
|
||||
|
||||
|
||||
def create_comparison_plot(data, out_loc, y_label, x_label, sort_label, bg_label=None, bg_function=None, settings=None):
|
||||
""" Creates a scatter plot comparing multiple trackers between two metric fields, with one on the x-axis and the
|
||||
other on the y axis. Adds pareto optical lines and (optionally) a background contour.
|
||||
|
||||
Inputs:
|
||||
data: dict of dicts such that data[tracker_name][metric_field_name] = float
|
||||
y_label: the metric_field_name to be plotted on the y-axis
|
||||
x_label: the metric_field_name to be plotted on the x-axis
|
||||
sort_label: the metric_field_name by which trackers are ordered and ranked
|
||||
bg_label: the metric_field_name by which (optional) background contours are plotted
|
||||
bg_function: the (optional) function bg_function(x,y) which converts the x_label / y_label values into bg_label.
|
||||
settings: dict of plot settings with keys:
|
||||
'gap_val': gap between axis ticks and bg curves.
|
||||
'num_to_plot': maximum number of trackers to plot
|
||||
"""
|
||||
|
||||
# Only loaded when run to reduce minimum requirements
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
# Get plot settings
|
||||
if settings is None:
|
||||
gap_val = 2
|
||||
num_to_plot = 20
|
||||
else:
|
||||
gap_val = settings['gap_val']
|
||||
num_to_plot = settings['num_to_plot']
|
||||
|
||||
if (bg_label is None) != (bg_function is None):
|
||||
raise TrackEvalException('bg_function and bg_label must either be both given or neither given.')
|
||||
|
||||
# Extract data
|
||||
tracker_names = np.array(list(data.keys()))
|
||||
sort_index = np.array([data[t][sort_label] for t in tracker_names]).argsort()[::-1]
|
||||
x_values = np.array([data[t][x_label] for t in tracker_names])[sort_index][:num_to_plot]
|
||||
y_values = np.array([data[t][y_label] for t in tracker_names])[sort_index][:num_to_plot]
|
||||
|
||||
# Print info on what is being plotted
|
||||
tracker_names = tracker_names[sort_index][:num_to_plot]
|
||||
print('\nPlotting %s vs %s, for the following (ordered) trackers:' % (y_label, x_label))
|
||||
for i, name in enumerate(tracker_names):
|
||||
print('%i: %s' % (i+1, name))
|
||||
|
||||
# Find best fitting boundaries for data
|
||||
boundaries = _get_boundaries(x_values, y_values, round_val=gap_val/2)
|
||||
|
||||
fig = plt.figure()
|
||||
|
||||
# Plot background contour
|
||||
if bg_function is not None:
|
||||
_plot_bg_contour(bg_function, boundaries, gap_val)
|
||||
|
||||
# Plot pareto optimal lines
|
||||
_plot_pareto_optimal_lines(x_values, y_values)
|
||||
|
||||
# Plot data points with number labels
|
||||
labels = np.arange(len(y_values)) + 1
|
||||
plt.plot(x_values, y_values, 'b.', markersize=15)
|
||||
for xx, yy, l in zip(x_values, y_values, labels):
|
||||
plt.text(xx, yy, str(l), color="red", fontsize=15)
|
||||
|
||||
# Add extra explanatory text to plots
|
||||
plt.text(0, -0.11, 'label order:\nHOTA', horizontalalignment='left', verticalalignment='center',
|
||||
transform=fig.axes[0].transAxes, color="red", fontsize=12)
|
||||
if bg_label is not None:
|
||||
plt.text(1, -0.11, 'curve values:\n' + bg_label, horizontalalignment='right', verticalalignment='center',
|
||||
transform=fig.axes[0].transAxes, color="grey", fontsize=12)
|
||||
|
||||
plt.xlabel(x_label, fontsize=15)
|
||||
plt.ylabel(y_label, fontsize=15)
|
||||
title = y_label + ' vs ' + x_label
|
||||
if bg_label is not None:
|
||||
title += ' (' + bg_label + ')'
|
||||
plt.title(title, fontsize=17)
|
||||
plt.xticks(np.arange(0, 100, gap_val))
|
||||
plt.yticks(np.arange(0, 100, gap_val))
|
||||
min_x, max_x, min_y, max_y = boundaries
|
||||
plt.xlim(min_x, max_x)
|
||||
plt.ylim(min_y, max_y)
|
||||
plt.gca().set_aspect('equal', adjustable='box')
|
||||
plt.tight_layout()
|
||||
|
||||
os.makedirs(out_loc, exist_ok=True)
|
||||
filename = os.path.join(out_loc, title.replace(' ', '_'))
|
||||
plt.savefig(filename + '.pdf', bbox_inches='tight', pad_inches=0.05)
|
||||
plt.savefig(filename + '.png', bbox_inches='tight', pad_inches=0.05)
|
||||
|
||||
|
||||
def _get_boundaries(x_values, y_values, round_val):
|
||||
x1 = np.min(np.floor((x_values - 0.5) / round_val) * round_val)
|
||||
x2 = np.max(np.ceil((x_values + 0.5) / round_val) * round_val)
|
||||
y1 = np.min(np.floor((y_values - 0.5) / round_val) * round_val)
|
||||
y2 = np.max(np.ceil((y_values + 0.5) / round_val) * round_val)
|
||||
x_range = x2 - x1
|
||||
y_range = y2 - y1
|
||||
max_range = max(x_range, y_range)
|
||||
x_center = (x1 + x2) / 2
|
||||
y_center = (y1 + y2) / 2
|
||||
min_x = max(x_center - max_range / 2, 0)
|
||||
max_x = min(x_center + max_range / 2, 100)
|
||||
min_y = max(y_center - max_range / 2, 0)
|
||||
max_y = min(y_center + max_range / 2, 100)
|
||||
return min_x, max_x, min_y, max_y
|
||||
|
||||
|
||||
def geometric_mean(x, y):
|
||||
return np.sqrt(x * y)
|
||||
|
||||
|
||||
def jaccard(x, y):
|
||||
x = x / 100
|
||||
y = y / 100
|
||||
return 100 * (x * y) / (x + y - x * y)
|
||||
|
||||
|
||||
def multiplication(x, y):
|
||||
return x * y / 100
|
||||
|
||||
|
||||
bg_function_dict = {
|
||||
"geometric_mean": geometric_mean,
|
||||
"jaccard": jaccard,
|
||||
"multiplication": multiplication,
|
||||
}
|
||||
|
||||
|
||||
def _plot_bg_contour(bg_function, plot_boundaries, gap_val):
|
||||
""" Plot background contour. """
|
||||
|
||||
# Only loaded when run to reduce minimum requirements
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
# Plot background contour
|
||||
min_x, max_x, min_y, max_y = plot_boundaries
|
||||
x = np.arange(min_x, max_x, 0.1)
|
||||
y = np.arange(min_y, max_y, 0.1)
|
||||
x_grid, y_grid = np.meshgrid(x, y)
|
||||
if bg_function in bg_function_dict.keys():
|
||||
z_grid = bg_function_dict[bg_function](x_grid, y_grid)
|
||||
else:
|
||||
raise TrackEvalException("background plotting function '%s' is not defined." % bg_function)
|
||||
levels = np.arange(0, 100, gap_val)
|
||||
con = plt.contour(x_grid, y_grid, z_grid, levels, colors='grey')
|
||||
|
||||
def bg_format(val):
|
||||
s = '{:1f}'.format(val)
|
||||
return '{:.0f}'.format(val) if s[-1] == '0' else s
|
||||
|
||||
con.levels = [bg_format(val) for val in con.levels]
|
||||
plt.clabel(con, con.levels, inline=True, fmt='%r', fontsize=8)
|
||||
|
||||
|
||||
def _plot_pareto_optimal_lines(x_values, y_values):
|
||||
""" Plot pareto optimal lines """
|
||||
|
||||
# Only loaded when run to reduce minimum requirements
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
# Plot pareto optimal lines
|
||||
cxs = x_values
|
||||
cys = y_values
|
||||
best_y = np.argmax(cys)
|
||||
x_pareto = [0, cxs[best_y]]
|
||||
y_pareto = [cys[best_y], cys[best_y]]
|
||||
t = 2
|
||||
remaining = cxs > x_pareto[t - 1]
|
||||
cys = cys[remaining]
|
||||
cxs = cxs[remaining]
|
||||
while len(cxs) > 0 and len(cys) > 0:
|
||||
best_y = np.argmax(cys)
|
||||
x_pareto += [x_pareto[t - 1], cxs[best_y]]
|
||||
y_pareto += [cys[best_y], cys[best_y]]
|
||||
t += 2
|
||||
remaining = cxs > x_pareto[t - 1]
|
||||
cys = cys[remaining]
|
||||
cxs = cxs[remaining]
|
||||
x_pareto.append(x_pareto[t - 1])
|
||||
y_pareto.append(0)
|
||||
plt.plot(np.array(x_pareto), np.array(y_pareto), '--r')
|
||||
146
test/yolov7-tracker/tracker/trackeval/utils.py
Normal file
146
test/yolov7-tracker/tracker/trackeval/utils.py
Normal file
@@ -0,0 +1,146 @@
|
||||
|
||||
import os
|
||||
import csv
|
||||
import argparse
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
def init_config(config, default_config, name=None):
|
||||
"""Initialise non-given config values with defaults"""
|
||||
if config is None:
|
||||
config = default_config
|
||||
else:
|
||||
for k in default_config.keys():
|
||||
if k not in config.keys():
|
||||
config[k] = default_config[k]
|
||||
if name and config['PRINT_CONFIG']:
|
||||
print('\n%s Config:' % name)
|
||||
for c in config.keys():
|
||||
print('%-20s : %-30s' % (c, config[c]))
|
||||
return config
|
||||
|
||||
|
||||
def update_config(config):
|
||||
"""
|
||||
Parse the arguments of a script and updates the config values for a given value if specified in the arguments.
|
||||
:param config: the config to update
|
||||
:return: the updated config
|
||||
"""
|
||||
parser = argparse.ArgumentParser()
|
||||
for setting in config.keys():
|
||||
if type(config[setting]) == list or type(config[setting]) == type(None):
|
||||
parser.add_argument("--" + setting, nargs='+')
|
||||
else:
|
||||
parser.add_argument("--" + setting)
|
||||
args = parser.parse_args().__dict__
|
||||
for setting in args.keys():
|
||||
if args[setting] is not None:
|
||||
if type(config[setting]) == type(True):
|
||||
if args[setting] == 'True':
|
||||
x = True
|
||||
elif args[setting] == 'False':
|
||||
x = False
|
||||
else:
|
||||
raise Exception('Command line parameter ' + setting + 'must be True or False')
|
||||
elif type(config[setting]) == type(1):
|
||||
x = int(args[setting])
|
||||
elif type(args[setting]) == type(None):
|
||||
x = None
|
||||
else:
|
||||
x = args[setting]
|
||||
config[setting] = x
|
||||
return config
|
||||
|
||||
|
||||
def get_code_path():
|
||||
"""Get base path where code is"""
|
||||
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
|
||||
def validate_metrics_list(metrics_list):
|
||||
"""Get names of metric class and ensures they are unique, further checks that the fields within each metric class
|
||||
do not have overlapping names.
|
||||
"""
|
||||
metric_names = [metric.get_name() for metric in metrics_list]
|
||||
# check metric names are unique
|
||||
if len(metric_names) != len(set(metric_names)):
|
||||
raise TrackEvalException('Code being run with multiple metrics of the same name')
|
||||
fields = []
|
||||
for m in metrics_list:
|
||||
fields += m.fields
|
||||
# check metric fields are unique
|
||||
if len(fields) != len(set(fields)):
|
||||
raise TrackEvalException('Code being run with multiple metrics with fields of the same name')
|
||||
return metric_names
|
||||
|
||||
|
||||
def write_summary_results(summaries, cls, output_folder):
|
||||
"""Write summary results to file"""
|
||||
|
||||
fields = sum([list(s.keys()) for s in summaries], [])
|
||||
values = sum([list(s.values()) for s in summaries], [])
|
||||
|
||||
# In order to remain consistent upon new fields being adding, for each of the following fields if they are present
|
||||
# they will be output in the summary first in the order below. Any further fields will be output in the order each
|
||||
# metric family is called, and within each family either in the order they were added to the dict (python >= 3.6) or
|
||||
# randomly (python < 3.6).
|
||||
default_order = ['HOTA', 'DetA', 'AssA', 'DetRe', 'DetPr', 'AssRe', 'AssPr', 'LocA', 'OWTA', 'HOTA(0)', 'LocA(0)',
|
||||
'HOTALocA(0)', 'MOTA', 'MOTP', 'MODA', 'CLR_Re', 'CLR_Pr', 'MTR', 'PTR', 'MLR', 'CLR_TP', 'CLR_FN',
|
||||
'CLR_FP', 'IDSW', 'MT', 'PT', 'ML', 'Frag', 'sMOTA', 'IDF1', 'IDR', 'IDP', 'IDTP', 'IDFN', 'IDFP',
|
||||
'Dets', 'GT_Dets', 'IDs', 'GT_IDs']
|
||||
default_ordered_dict = OrderedDict(zip(default_order, [None for _ in default_order]))
|
||||
for f, v in zip(fields, values):
|
||||
default_ordered_dict[f] = v
|
||||
for df in default_order:
|
||||
if default_ordered_dict[df] is None:
|
||||
del default_ordered_dict[df]
|
||||
fields = list(default_ordered_dict.keys())
|
||||
values = list(default_ordered_dict.values())
|
||||
|
||||
out_file = os.path.join(output_folder, cls + '_summary.txt')
|
||||
os.makedirs(os.path.dirname(out_file), exist_ok=True)
|
||||
with open(out_file, 'w', newline='') as f:
|
||||
writer = csv.writer(f, delimiter=' ')
|
||||
writer.writerow(fields)
|
||||
writer.writerow(values)
|
||||
|
||||
|
||||
def write_detailed_results(details, cls, output_folder):
|
||||
"""Write detailed results to file"""
|
||||
sequences = details[0].keys()
|
||||
fields = ['seq'] + sum([list(s['COMBINED_SEQ'].keys()) for s in details], [])
|
||||
out_file = os.path.join(output_folder, cls + '_detailed.csv')
|
||||
os.makedirs(os.path.dirname(out_file), exist_ok=True)
|
||||
with open(out_file, 'w', newline='') as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(fields)
|
||||
for seq in sorted(sequences):
|
||||
if seq == 'COMBINED_SEQ':
|
||||
continue
|
||||
writer.writerow([seq] + sum([list(s[seq].values()) for s in details], []))
|
||||
writer.writerow(['COMBINED'] + sum([list(s['COMBINED_SEQ'].values()) for s in details], []))
|
||||
|
||||
|
||||
def load_detail(file):
|
||||
"""Loads detailed data for a tracker."""
|
||||
data = {}
|
||||
with open(file) as f:
|
||||
for i, row_text in enumerate(f):
|
||||
row = row_text.replace('\r', '').replace('\n', '').split(',')
|
||||
if i == 0:
|
||||
keys = row[1:]
|
||||
continue
|
||||
current_values = row[1:]
|
||||
seq = row[0]
|
||||
if seq == 'COMBINED':
|
||||
seq = 'COMBINED_SEQ'
|
||||
if (len(current_values) == len(keys)) and seq != '':
|
||||
data[seq] = {}
|
||||
for key, value in zip(keys, current_values):
|
||||
data[seq][key] = float(value)
|
||||
return data
|
||||
|
||||
|
||||
class TrackEvalException(Exception):
|
||||
"""Custom exception for catching expected errors."""
|
||||
...
|
||||
36
test/yolov7-tracker/tracker/tracking_utils/envs.py
Normal file
36
test/yolov7-tracker/tracker/tracking_utils/envs.py
Normal file
@@ -0,0 +1,36 @@
|
||||
"""
|
||||
set gpus and ramdom seeds
|
||||
"""
|
||||
|
||||
import os
|
||||
import random
|
||||
import numpy as np
|
||||
from loguru import logger
|
||||
|
||||
import torch
|
||||
import torch.backends.cudnn as cudnn
|
||||
|
||||
def select_device(device):
|
||||
""" set device
|
||||
Args:
|
||||
device: str, 'cpu' or '0' or '1,2,3'-like
|
||||
|
||||
Return:
|
||||
torch.device
|
||||
|
||||
"""
|
||||
|
||||
if device == 'cpu':
|
||||
logger.info('Use CPU for training')
|
||||
|
||||
elif ',' in device: # multi-gpu
|
||||
logger.error('Multi-GPU currently not supported')
|
||||
|
||||
else:
|
||||
logger.info(f'set gpu {device}')
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = device
|
||||
assert torch.cuda.is_available()
|
||||
|
||||
cuda = device != 'cpu' and torch.cuda.is_available()
|
||||
device = torch.device('cuda:0' if cuda else 'cpu')
|
||||
return device
|
||||
26
test/yolov7-tracker/tracker/tracking_utils/tools.py
Normal file
26
test/yolov7-tracker/tracker/tracking_utils/tools.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import numpy as np
|
||||
import cv2
|
||||
import os
|
||||
|
||||
def save_results(folder_name, seq_name, results, data_type='default'):
|
||||
"""
|
||||
write results to txt file
|
||||
|
||||
results: list row format: frame id, target id, box coordinate, class(optional)
|
||||
to_file: file path(optional)
|
||||
data_type: 'default' | 'mot_challenge', write data format, default or MOT submission
|
||||
"""
|
||||
assert len(results)
|
||||
|
||||
if not os.path.exists(f'./track_results/{folder_name}'):
|
||||
|
||||
os.makedirs(f'./track_results/{folder_name}')
|
||||
|
||||
with open(os.path.join('./track_results', folder_name, seq_name + '.txt'), 'w') as f:
|
||||
for frame_id, target_ids, tlwhs, clses, scores in results:
|
||||
for id, tlwh, score in zip(target_ids, tlwhs, scores):
|
||||
f.write(f'{frame_id},{id},{tlwh[0]:.2f},{tlwh[1]:.2f},{tlwh[2]:.2f},{tlwh[3]:.2f},{score:.2f},-1,-1,-1\n')
|
||||
|
||||
f.close()
|
||||
|
||||
return folder_name
|
||||
64
test/yolov7-tracker/tracker/tracking_utils/visualization.py
Normal file
64
test/yolov7-tracker/tracker/tracking_utils/visualization.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import cv2
|
||||
import os
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
def plot_img(img, frame_id, results, save_dir):
|
||||
"""
|
||||
img: np.ndarray: (H, W, C)
|
||||
frame_id: int
|
||||
results: [tlwhs, ids, clses]
|
||||
save_dir: sr
|
||||
|
||||
plot images with bboxes of a seq
|
||||
"""
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
|
||||
assert img is not None
|
||||
|
||||
if len(img.shape) > 3:
|
||||
img = img.squeeze(0)
|
||||
|
||||
img_ = np.ascontiguousarray(np.copy(img))
|
||||
|
||||
tlwhs, ids, clses = results[0], results[1], results[2]
|
||||
for tlwh, id, cls in zip(tlwhs, ids, clses):
|
||||
|
||||
# convert tlwh to tlbr
|
||||
tlbr = tuple([int(tlwh[0]), int(tlwh[1]), int(tlwh[0] + tlwh[2]), int(tlwh[1] + tlwh[3])])
|
||||
# draw a rect
|
||||
cv2.rectangle(img_, tlbr[:2], tlbr[2:], get_color(id), thickness=3, )
|
||||
# note the id and cls
|
||||
text = f'{int(cls)}_{id}'
|
||||
cv2.putText(img_, text, (tlbr[0], tlbr[1]), fontFace=cv2.FONT_HERSHEY_PLAIN, fontScale=1,
|
||||
color=(255, 164, 0), thickness=2)
|
||||
|
||||
cv2.imwrite(filename=os.path.join(save_dir, f'{frame_id:05d}.jpg'), img=img_)
|
||||
|
||||
def get_color(idx):
|
||||
"""
|
||||
aux func for plot_seq
|
||||
get a unique color for each id
|
||||
"""
|
||||
idx = idx * 3
|
||||
color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
|
||||
|
||||
return color
|
||||
|
||||
def save_video(images_path):
|
||||
"""
|
||||
save images (frames) to a video
|
||||
"""
|
||||
|
||||
images_list = sorted(os.listdir(images_path))
|
||||
save_video_path = os.path.join(images_path, images_path.split('/')[-1] + '.mp4')
|
||||
|
||||
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
||||
|
||||
img0 = Image.open(os.path.join(images_path, images_list[0]))
|
||||
vw = cv2.VideoWriter(save_video_path, fourcc, 15, img0.size)
|
||||
|
||||
for image_name in images_list:
|
||||
image = cv2.imread(filename=os.path.join(images_path, image_name))
|
||||
vw.write(image)
|
||||
16
test/yolov7-tracker/tracker/yolov7_utils/postprocess.py
Normal file
16
test/yolov7-tracker/tracker/yolov7_utils/postprocess.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from utils.general import non_max_suppression, scale_coords
|
||||
|
||||
def postprocess(out, conf_thresh, nms_thresh, img_size, ori_img_size):
|
||||
"""
|
||||
Args:
|
||||
out: out from v7 model
|
||||
det_config: configs
|
||||
"""
|
||||
|
||||
out = out[0]
|
||||
out = non_max_suppression(out, conf_thresh, nms_thresh, )[0]
|
||||
out[:, :4] = scale_coords(img_size, out[:, :4], ori_img_size, ratio_pad=None).round()
|
||||
|
||||
# out: tlbr, conf, cls
|
||||
|
||||
return out
|
||||
@@ -0,0 +1,7 @@
|
||||
train: /data/wujiapeng/codes/DroneGraphTracker/airmot/train.txt
|
||||
val: /data/wujiapeng/codes/DroneGraphTracker/airmot/test.txt
|
||||
test: /data/wujiapeng/codes/DroneGraphTracker/airmot/test.txt
|
||||
|
||||
nc: 2
|
||||
|
||||
names: ['plane', 'ship']
|
||||
@@ -0,0 +1,7 @@
|
||||
train: /data/wujiapeng/codes/DroneGraphTracker/uavdt/train.txt
|
||||
val: /data/wujiapeng/codes/DroneGraphTracker/uavdt/test.txt
|
||||
test: /data/wujiapeng/codes/DroneGraphTracker/uavdt/test.txt
|
||||
|
||||
nc: 1
|
||||
|
||||
names: ['car']
|
||||
@@ -0,0 +1,7 @@
|
||||
train: /data/wujiapeng/codes/DroneGraphTracker/visdrone/train.txt
|
||||
val: /data/wujiapeng/codes/DroneGraphTracker/visdrone/test.txt
|
||||
test: /data/wujiapeng/codes/DroneGraphTracker/visdrone/test.txt
|
||||
|
||||
nc: 5
|
||||
|
||||
names: ['pedestrain', 'car', 'van', 'truck', 'bus']
|
||||
@@ -0,0 +1,7 @@
|
||||
train: /data/wujiapeng/codes/DroneGraphTracker/visdrone_det/train.txt
|
||||
val: /data/wujiapeng/codes/DroneGraphTracker/visdrone_det/test.txt
|
||||
test: /data/wujiapeng/codes/DroneGraphTracker/visdrone_det/test.txt
|
||||
|
||||
nc: 5
|
||||
|
||||
names: ['pedestrain', 'car', 'van', 'truck', 'bus']
|
||||
6
test/yolov7-tracker/tracker/yolov8_utils/postprocess.py
Normal file
6
test/yolov7-tracker/tracker/yolov8_utils/postprocess.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from ultralytics import YOLO
|
||||
|
||||
def postprocess(out):
|
||||
|
||||
out = out[0].boxes
|
||||
return out.data
|
||||
36
test/yolov7-tracker/tracker/yolov8_utils/train_yolov8.py
Normal file
36
test/yolov7-tracker/tracker/yolov8_utils/train_yolov8.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import torch
|
||||
from ultralytics import YOLO
|
||||
import numpy as np
|
||||
|
||||
import argparse
|
||||
|
||||
def main(args):
|
||||
""" main func
|
||||
|
||||
"""
|
||||
|
||||
model = YOLO(model=args.model_weight)
|
||||
model.train(
|
||||
data=args.data_cfg,
|
||||
epochs=args.epochs,
|
||||
batch=args.batch_size,
|
||||
imgsz=args.img_sz,
|
||||
patience=50, # epochs to wait for no observable improvement for early stopping of training
|
||||
device=args.device,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser("YOLO v8 train parser")
|
||||
|
||||
parser.add_argument('--model', type=str, default='yolov8s.yaml', help='yaml or pt file')
|
||||
parser.add_argument('--model_weight', type=str, default='yolov8s.pt', help='')
|
||||
parser.add_argument('--data_cfg', type=str, default='yolov8_utils/data_cfgs/visdrone.yaml', help='')
|
||||
parser.add_argument('--epochs', type=int, default=30, help='')
|
||||
parser.add_argument('--batch_size', type=int, default=8, help='')
|
||||
parser.add_argument('--img_sz', type=int, default=1280, help='')
|
||||
parser.add_argument('--device', type=str, default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args)
|
||||
155
test/yolov7-tracker/tracker/yolox_utils/mot_dataset.py
Normal file
155
test/yolov7-tracker/tracker/yolox_utils/mot_dataset.py
Normal file
@@ -0,0 +1,155 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
from pycocotools.coco import COCO
|
||||
|
||||
import os
|
||||
|
||||
from yolox.data.datasets import Dataset
|
||||
|
||||
|
||||
class MOTDataset(Dataset):
|
||||
"""
|
||||
COCO dataset class.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data_dir=None,
|
||||
json_file="train_half.json",
|
||||
name="train",
|
||||
img_size=(608, 1088),
|
||||
preproc=None,
|
||||
):
|
||||
"""
|
||||
COCO dataset initialization. Annotation data are read into memory by COCO API.
|
||||
Args:
|
||||
data_dir (str): dataset root directory
|
||||
json_file (str): COCO json file name
|
||||
name (str): COCO data name (e.g. 'train2017' or 'val2017')
|
||||
img_size (int): target image size after pre-processing
|
||||
preproc: data augmentation strategy
|
||||
"""
|
||||
super().__init__(img_size)
|
||||
|
||||
self.data_dir = data_dir
|
||||
self.json_file = json_file
|
||||
|
||||
self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file))
|
||||
self.ids = self.coco.getImgIds()
|
||||
self.class_ids = sorted(self.coco.getCatIds())
|
||||
cats = self.coco.loadCats(self.coco.getCatIds())
|
||||
self._classes = tuple([c["name"] for c in cats])
|
||||
self.annotations = self._load_coco_annotations()
|
||||
self.name = name
|
||||
self.img_size = img_size
|
||||
self.preproc = preproc
|
||||
|
||||
def __len__(self):
|
||||
return len(self.ids)
|
||||
|
||||
def _load_coco_annotations(self):
|
||||
return [self.load_anno_from_ids(_ids) for _ids in self.ids]
|
||||
|
||||
def load_anno_from_ids(self, id_):
|
||||
im_ann = self.coco.loadImgs(id_)[0]
|
||||
width = im_ann["width"]
|
||||
height = im_ann["height"]
|
||||
frame_id = im_ann["frame_id"]
|
||||
video_id = im_ann["video_id"]
|
||||
anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False)
|
||||
annotations = self.coco.loadAnns(anno_ids)
|
||||
objs = []
|
||||
for obj in annotations:
|
||||
x1 = obj["bbox"][0]
|
||||
y1 = obj["bbox"][1]
|
||||
x2 = x1 + obj["bbox"][2]
|
||||
y2 = y1 + obj["bbox"][3]
|
||||
if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
|
||||
obj["clean_bbox"] = [x1, y1, x2, y2]
|
||||
objs.append(obj)
|
||||
|
||||
num_objs = len(objs)
|
||||
|
||||
res = np.zeros((num_objs, 6))
|
||||
|
||||
for ix, obj in enumerate(objs):
|
||||
cls = self.class_ids.index(obj["category_id"])
|
||||
res[ix, 0:4] = obj["clean_bbox"]
|
||||
res[ix, 4] = cls
|
||||
res[ix, 5] = obj["track_id"]
|
||||
|
||||
file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg"
|
||||
img_info = (height, width, frame_id, video_id, file_name)
|
||||
|
||||
del im_ann, annotations
|
||||
|
||||
return (res, img_info, file_name)
|
||||
|
||||
def load_anno(self, index):
|
||||
return self.annotations[index][0]
|
||||
|
||||
def pull_item(self, index):
|
||||
id_ = self.ids[index]
|
||||
|
||||
res, img_info, file_name = self.annotations[index]
|
||||
# load image and preprocess
|
||||
img_file = os.path.join(
|
||||
self.data_dir, 'images', self.name, file_name
|
||||
)
|
||||
# for debug
|
||||
# print(f"************{img_file}************")
|
||||
# exit()
|
||||
img = cv2.imread(img_file)
|
||||
assert img is not None
|
||||
|
||||
return img, res.copy(), img_info, np.array([id_])
|
||||
|
||||
@Dataset.resize_getitem
|
||||
def __getitem__(self, index):
|
||||
"""
|
||||
One image / label pair for the given index is picked up and pre-processed.
|
||||
|
||||
Args:
|
||||
index (int): data index
|
||||
|
||||
Returns:
|
||||
img (numpy.ndarray): pre-processed image
|
||||
padded_labels (torch.Tensor): pre-processed label data.
|
||||
The shape is :math:`[max_labels, 5]`.
|
||||
each label consists of [class, xc, yc, w, h]:
|
||||
class (float): class index.
|
||||
xc, yc (float) : center of bbox whose values range from 0 to 1.
|
||||
w, h (float) : size of bbox whose values range from 0 to 1.
|
||||
info_img : tuple of h, w, nh, nw, dx, dy.
|
||||
h, w (int): original shape of the image
|
||||
nh, nw (int): shape of the resized image without padding
|
||||
dx, dy (int): pad size
|
||||
img_id (int): same as the input index. Used for evaluation.
|
||||
"""
|
||||
img, target, img_info, img_id = self.pull_item(index)
|
||||
|
||||
if self.preproc is not None:
|
||||
img, target = self.preproc(img, target, self.input_dim)
|
||||
return img, target, img_info, img_id
|
||||
|
||||
class VisDroneDataset(MOTDataset):
|
||||
def __init__(self, data_dir=None, json_file="train_half.json", name="train", img_size=(608, 1088), preproc=None):
|
||||
super().__init__(data_dir, json_file, name, img_size, preproc)
|
||||
self.DATA_ROOT = '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019'
|
||||
self.VisD_dict = {'train':'VisDrone2019-MOT-train',
|
||||
'test':'VisDrone2019-MOT-test-dev'}
|
||||
def pull_item(self, index):
|
||||
id_ = self.ids[index]
|
||||
|
||||
res, img_info, file_name = self.annotations[index]
|
||||
# load image and preprocess
|
||||
# img_file = os.path.join(
|
||||
# self.data_dir, self.name, file_name
|
||||
# )
|
||||
img_file = os.path.join(
|
||||
self.DATA_ROOT, self.VisD_dict[self.name], 'sequences', file_name
|
||||
)
|
||||
img = cv2.imread(img_file)
|
||||
assert img is not None
|
||||
|
||||
return img, res.copy(), img_info, np.array([id_])
|
||||
29
test/yolov7-tracker/tracker/yolox_utils/postprocess.py
Normal file
29
test/yolov7-tracker/tracker/yolox_utils/postprocess.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import torch
|
||||
|
||||
from yolox.utils import postprocess
|
||||
|
||||
def postprocess_yolox(out, num_classes, conf_thresh, img, ori_img):
|
||||
"""
|
||||
convert out to -> (tlbr, conf, cls)
|
||||
"""
|
||||
|
||||
out = postprocess(out, num_classes, conf_thresh, )[0] # (tlbr, obj_conf, cls_conf, cls)
|
||||
|
||||
if out is None: return out
|
||||
|
||||
# merge conf
|
||||
out[:, 4] *= out[:, 5]
|
||||
out[:, 5] = out[:, -1]
|
||||
out = out[:, :-1]
|
||||
|
||||
# scale to origin size
|
||||
|
||||
img_size = [img.shape[-2], img.shape[-1]] # h, w
|
||||
ori_img_size = [ori_img.shape[0], ori_img.shape[1]] # h0, w0
|
||||
img_h, img_w = img_size[0], img_size[1]
|
||||
|
||||
scale = min(float(img_h) / ori_img_size[0], float(img_w) / ori_img_size[1])
|
||||
|
||||
out[:, :4] /= scale
|
||||
|
||||
return out
|
||||
122
test/yolov7-tracker/tracker/yolox_utils/train_yolox.py
Normal file
122
test/yolov7-tracker/tracker/yolox_utils/train_yolox.py
Normal file
@@ -0,0 +1,122 @@
|
||||
from loguru import logger
|
||||
|
||||
import torch
|
||||
import torch.backends.cudnn as cudnn
|
||||
|
||||
from yolox.core import Trainer, launch
|
||||
from yolox.exp import get_exp
|
||||
|
||||
import argparse
|
||||
import random
|
||||
import warnings
|
||||
|
||||
|
||||
def make_parser():
|
||||
parser = argparse.ArgumentParser("YOLOX train parser")
|
||||
parser.add_argument("-expn", "--experiment-name", type=str, default=None)
|
||||
parser.add_argument("-n", "--name", type=str, default=None, help="model name")
|
||||
|
||||
# distributed
|
||||
parser.add_argument(
|
||||
"--dist-backend", default="nccl", type=str, help="distributed backend"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dist-url",
|
||||
default=None,
|
||||
type=str,
|
||||
help="url used to set up distributed training",
|
||||
)
|
||||
parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size")
|
||||
parser.add_argument(
|
||||
"-d", "--devices", default=None, type=int, help="device for training"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--local_rank", default=0, type=int, help="local rank for dist training"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--exp_file",
|
||||
default=None,
|
||||
type=str,
|
||||
help="plz input your expriment description file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume", default=False, action="store_true", help="resume training"
|
||||
)
|
||||
parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file")
|
||||
parser.add_argument(
|
||||
"-e",
|
||||
"--start_epoch",
|
||||
default=None,
|
||||
type=int,
|
||||
help="resume training start epoch",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_machines", default=1, type=int, help="num of node for training"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--machine_rank", default=0, type=int, help="node rank for multi-node training"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fp16",
|
||||
dest="fp16",
|
||||
default=True,
|
||||
action="store_true",
|
||||
help="Adopting mix precision training.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--occupy",
|
||||
dest="occupy",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="occupy GPU memory first for training.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"opts",
|
||||
help="Modify config options using the command-line",
|
||||
default=None,
|
||||
nargs=argparse.REMAINDER,
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
@logger.catch
|
||||
def main(exp, args):
|
||||
if exp.seed is not None:
|
||||
random.seed(exp.seed)
|
||||
torch.manual_seed(exp.seed)
|
||||
cudnn.deterministic = True
|
||||
warnings.warn(
|
||||
"You have chosen to seed training. This will turn on the CUDNN deterministic setting, "
|
||||
"which can slow down your training considerably! You may see unexpected behavior "
|
||||
"when restarting from checkpoints."
|
||||
)
|
||||
|
||||
# set environment variables for distributed training
|
||||
cudnn.benchmark = True
|
||||
|
||||
trainer = Trainer(exp, args)
|
||||
trainer.train()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = make_parser().parse_args()
|
||||
exp = get_exp(args.exp_file, args.name)
|
||||
exp.merge(args.opts)
|
||||
|
||||
if not args.experiment_name:
|
||||
args.experiment_name = exp.exp_name
|
||||
|
||||
num_gpu = torch.cuda.device_count() if args.devices is None else args.devices
|
||||
assert num_gpu <= torch.cuda.device_count()
|
||||
|
||||
launch(
|
||||
main,
|
||||
num_gpu,
|
||||
args.num_machines,
|
||||
args.machine_rank,
|
||||
backend=args.dist_backend,
|
||||
dist_url=args.dist_url,
|
||||
args=(exp, args),
|
||||
)
|
||||
144
test/yolov7-tracker/tracker/yolox_utils/yolox_m.py
Normal file
144
test/yolov7-tracker/tracker/yolox_utils/yolox_m.py
Normal file
@@ -0,0 +1,144 @@
|
||||
# encoding: utf-8
|
||||
import os
|
||||
import random
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.distributed as dist
|
||||
|
||||
from yolox.exp import Exp as MyExp
|
||||
from yolox.data import get_yolox_datadir
|
||||
|
||||
class Exp(MyExp):
|
||||
def __init__(self):
|
||||
super(Exp, self).__init__()
|
||||
self.num_classes = 1 # 1 for uavdt mot17
|
||||
self.depth = 0.67
|
||||
self.width = 0.75
|
||||
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
||||
self.train_ann = "train.json"
|
||||
self.val_ann = "test.json"
|
||||
self.input_size = (800, 1440)
|
||||
self.test_size = (800, 1440)
|
||||
self.random_size = (18, 32)
|
||||
self.max_epoch = 80
|
||||
self.print_interval = 20
|
||||
self.eval_interval = 5
|
||||
self.test_conf = 0.001
|
||||
self.nmsthre = 0.7
|
||||
self.no_aug_epochs = 10
|
||||
self.basic_lr_per_img = 0.001 / 64.0
|
||||
self.warmup_epochs = 1
|
||||
|
||||
def get_data_loader(self, batch_size, is_distributed, no_aug=False):
|
||||
from yolox.data import (
|
||||
TrainTransform,
|
||||
YoloBatchSampler,
|
||||
DataLoader,
|
||||
InfiniteSampler,
|
||||
MosaicDetection,
|
||||
)
|
||||
|
||||
from mot_dataset import MOTDataset
|
||||
|
||||
dataset = MOTDataset(
|
||||
# data_dir=os.path.join(get_yolox_datadir(), "mot"),
|
||||
# data_dir='/data/wujiapeng/datasets/UAVDT',
|
||||
data_dir='/data/wujiapeng/datasets/VisDrone2019/VisDrone2019',
|
||||
json_file=self.train_ann,
|
||||
name='train',
|
||||
img_size=self.input_size,
|
||||
preproc=TrainTransform(
|
||||
rgb_means=(0.485, 0.456, 0.406),
|
||||
std=(0.229, 0.224, 0.225),
|
||||
max_labels=500,
|
||||
),
|
||||
)
|
||||
|
||||
dataset = MosaicDetection(
|
||||
dataset,
|
||||
mosaic=not no_aug,
|
||||
img_size=self.input_size,
|
||||
preproc=TrainTransform(
|
||||
rgb_means=(0.485, 0.456, 0.406),
|
||||
std=(0.229, 0.224, 0.225),
|
||||
max_labels=1000,
|
||||
),
|
||||
degrees=self.degrees,
|
||||
translate=self.translate,
|
||||
scale=self.scale,
|
||||
shear=self.shear,
|
||||
perspective=self.perspective,
|
||||
enable_mixup=self.enable_mixup,
|
||||
)
|
||||
|
||||
self.dataset = dataset
|
||||
|
||||
if is_distributed:
|
||||
batch_size = batch_size // dist.get_world_size()
|
||||
|
||||
sampler = InfiniteSampler(
|
||||
len(self.dataset), seed=self.seed if self.seed else 0
|
||||
)
|
||||
|
||||
batch_sampler = YoloBatchSampler(
|
||||
sampler=sampler,
|
||||
batch_size=batch_size,
|
||||
drop_last=False,
|
||||
input_dimension=self.input_size,
|
||||
mosaic=not no_aug,
|
||||
)
|
||||
|
||||
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
|
||||
dataloader_kwargs["batch_sampler"] = batch_sampler
|
||||
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
|
||||
|
||||
return train_loader
|
||||
|
||||
def get_eval_loader(self, batch_size, is_distributed, testdev=False):
|
||||
from yolox.data import ValTransform
|
||||
from mot_dataset import MOTDataset
|
||||
|
||||
valdataset = MOTDataset(
|
||||
# data_dir=os.path.join(get_yolox_datadir(), "mot"),
|
||||
# data_dir='/data/wujiapeng/datasets/UAVDT',
|
||||
data_dir='/data/wujiapeng/datasets/VisDrone2019/VisDrone2019',
|
||||
json_file=self.val_ann,
|
||||
img_size=self.test_size,
|
||||
name='test',
|
||||
preproc=ValTransform(
|
||||
rgb_means=(0.485, 0.456, 0.406),
|
||||
std=(0.229, 0.224, 0.225),
|
||||
),
|
||||
)
|
||||
|
||||
if is_distributed:
|
||||
batch_size = batch_size // dist.get_world_size()
|
||||
sampler = torch.utils.data.distributed.DistributedSampler(
|
||||
valdataset, shuffle=False
|
||||
)
|
||||
else:
|
||||
sampler = torch.utils.data.SequentialSampler(valdataset)
|
||||
|
||||
dataloader_kwargs = {
|
||||
"num_workers": self.data_num_workers,
|
||||
"pin_memory": True,
|
||||
"sampler": sampler,
|
||||
}
|
||||
dataloader_kwargs["batch_size"] = batch_size
|
||||
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
|
||||
|
||||
return val_loader
|
||||
|
||||
def get_evaluator(self, batch_size, is_distributed, testdev=False):
|
||||
from yolox.evaluators import COCOEvaluator
|
||||
|
||||
val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
|
||||
evaluator = COCOEvaluator(
|
||||
dataloader=val_loader,
|
||||
img_size=self.test_size,
|
||||
confthre=self.test_conf,
|
||||
nmsthre=self.nmsthre,
|
||||
num_classes=self.num_classes,
|
||||
testdev=testdev,
|
||||
)
|
||||
return evaluator
|
||||
142
test/yolov7-tracker/tracker/yolox_utils/yolox_x.py
Normal file
142
test/yolov7-tracker/tracker/yolox_utils/yolox_x.py
Normal file
@@ -0,0 +1,142 @@
|
||||
# encoding: utf-8
|
||||
import os
|
||||
import random
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.distributed as dist
|
||||
|
||||
from yolox.exp import Exp as MyExp
|
||||
from yolox.data import get_yolox_datadir
|
||||
|
||||
class Exp(MyExp):
|
||||
def __init__(self):
|
||||
super(Exp, self).__init__()
|
||||
self.num_classes = 1
|
||||
self.depth = 1.33
|
||||
self.width = 1.25
|
||||
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
||||
self.train_ann = "train.json"
|
||||
self.val_ann = "test.json"
|
||||
self.input_size = (800, 1440)
|
||||
self.test_size = (800, 1440)
|
||||
self.random_size = (18, 32)
|
||||
self.max_epoch = 80
|
||||
self.print_interval = 20
|
||||
self.eval_interval = 5
|
||||
self.test_conf = 0.001
|
||||
self.nmsthre = 0.7
|
||||
self.no_aug_epochs = 10
|
||||
self.basic_lr_per_img = 0.001 / 64.0
|
||||
self.warmup_epochs = 1
|
||||
|
||||
def get_data_loader(self, batch_size, is_distributed, no_aug=False):
|
||||
from yolox.data import (
|
||||
TrainTransform,
|
||||
YoloBatchSampler,
|
||||
DataLoader,
|
||||
InfiniteSampler,
|
||||
MosaicDetection,
|
||||
)
|
||||
|
||||
from mot_dataset import MOTDataset
|
||||
|
||||
dataset = MOTDataset(
|
||||
# data_dir=os.path.join(get_yolox_datadir(), "mot"),
|
||||
data_dir='/data/wujiapeng/datasets/UAVDT',
|
||||
json_file=self.train_ann,
|
||||
name='train',
|
||||
img_size=self.input_size,
|
||||
preproc=TrainTransform(
|
||||
rgb_means=(0.485, 0.456, 0.406),
|
||||
std=(0.229, 0.224, 0.225),
|
||||
max_labels=500,
|
||||
),
|
||||
)
|
||||
|
||||
dataset = MosaicDetection(
|
||||
dataset,
|
||||
mosaic=not no_aug,
|
||||
img_size=self.input_size,
|
||||
preproc=TrainTransform(
|
||||
rgb_means=(0.485, 0.456, 0.406),
|
||||
std=(0.229, 0.224, 0.225),
|
||||
max_labels=1000,
|
||||
),
|
||||
degrees=self.degrees,
|
||||
translate=self.translate,
|
||||
scale=self.scale,
|
||||
shear=self.shear,
|
||||
perspective=self.perspective,
|
||||
enable_mixup=self.enable_mixup,
|
||||
)
|
||||
|
||||
self.dataset = dataset
|
||||
|
||||
if is_distributed:
|
||||
batch_size = batch_size // dist.get_world_size()
|
||||
|
||||
sampler = InfiniteSampler(
|
||||
len(self.dataset), seed=self.seed if self.seed else 0
|
||||
)
|
||||
|
||||
batch_sampler = YoloBatchSampler(
|
||||
sampler=sampler,
|
||||
batch_size=batch_size,
|
||||
drop_last=False,
|
||||
input_dimension=self.input_size,
|
||||
mosaic=not no_aug,
|
||||
)
|
||||
|
||||
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
|
||||
dataloader_kwargs["batch_sampler"] = batch_sampler
|
||||
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
|
||||
|
||||
return train_loader
|
||||
|
||||
def get_eval_loader(self, batch_size, is_distributed, testdev=False):
|
||||
from yolox.data import ValTransform
|
||||
from mot_dataset import MOTDataset
|
||||
|
||||
valdataset = MOTDataset(
|
||||
# data_dir=os.path.join(get_yolox_datadir(), "mot"),
|
||||
data_dir='/data/wujiapeng/datasets/UAVDT',
|
||||
json_file=self.val_ann,
|
||||
img_size=self.test_size,
|
||||
name='test',
|
||||
preproc=ValTransform(
|
||||
rgb_means=(0.485, 0.456, 0.406),
|
||||
std=(0.229, 0.224, 0.225),
|
||||
),
|
||||
)
|
||||
|
||||
if is_distributed:
|
||||
batch_size = batch_size // dist.get_world_size()
|
||||
sampler = torch.utils.data.distributed.DistributedSampler(
|
||||
valdataset, shuffle=False
|
||||
)
|
||||
else:
|
||||
sampler = torch.utils.data.SequentialSampler(valdataset)
|
||||
|
||||
dataloader_kwargs = {
|
||||
"num_workers": self.data_num_workers,
|
||||
"pin_memory": True,
|
||||
"sampler": sampler,
|
||||
}
|
||||
dataloader_kwargs["batch_size"] = batch_size
|
||||
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
|
||||
|
||||
return val_loader
|
||||
|
||||
def get_evaluator(self, batch_size, is_distributed, testdev=False):
|
||||
from yolox.evaluators import COCOEvaluator
|
||||
|
||||
val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
|
||||
evaluator = COCOEvaluator(
|
||||
dataloader=val_loader,
|
||||
img_size=self.test_size,
|
||||
confthre=self.test_conf,
|
||||
nmsthre=self.nmsthre,
|
||||
num_classes=self.num_classes,
|
||||
testdev=testdev,
|
||||
)
|
||||
return evaluator
|
||||
Reference in New Issue
Block a user