init test

This commit is contained in:
√(noham)²
2024-07-18 01:45:03 +02:00
parent 69dc05552e
commit 3e95611523
181 changed files with 34510 additions and 1 deletions

View File

@@ -0,0 +1,32 @@
# Config file of MOT17 dataset
DATASET_ROOT: '/data/wujiapeng/datasets/MOT17' # your dataset root
SPLIT: test
CATEGORY_NAMES: # category names to show
- 'pedestrian'
CATEGORY_DICT:
0: 'pedestrian'
CERTAIN_SEQS:
-
IGNORE_SEQS: # Seqs you want to ignore
-
YAML_DICT: '' # NOTE: ONLY for yolo v5 model loader(func DetectMultiBackend)
TRACK_EVAL: # If use TrackEval to evaluate, use these configs
'DISPLAY_LESS_PROGRESS': False
'GT_FOLDER': '/data/wujiapeng/datasets/MOT17/train'
'TRACKERS_FOLDER': './tracker/results'
'SKIP_SPLIT_FOL': True
'TRACKER_SUB_FOLDER': ''
'SEQ_INFO':
'MOT17-02-SDP': null
'MOT17-04-SDP': null
'MOT17-05-SDP': null
'MOT17-09-SDP': null
'MOT17-10-SDP': null
'MOT17-11-SDP': null
'MOT17-13-SDP': null
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt'

View File

@@ -0,0 +1,26 @@
# Config file of UAVDT dataset
DATASET_ROOT: '/data/wujiapeng/datasets/UAVDT' # your dataset root
SPLIT: test
CATEGORY_NAMES: # category names to show
- 'car'
CATEGORY_DICT:
0: 'car'
CERTAIN_SEQS:
-
IGNORE_SEQS: # Seqs you want to ignore
-
YAML_DICT: './data/UAVDT.yaml' # NOTE: ONLY for yolo v5 model loader(func DetectMultiBackend)
TRACK_EVAL: # If use TrackEval to evaluate, use these configs
'DISPLAY_LESS_PROGRESS': False
'GT_FOLDER': '/data/wujiapeng/datasets/UAVDT/UAV-benchmark-M'
'TRACKERS_FOLDER': './tracker/results'
'SKIP_SPLIT_FOL': True
'TRACKER_SUB_FOLDER': ''
'SEQ_INFO':
'M0101': 407
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt'

View File

@@ -0,0 +1,61 @@
# Config file of VisDrone dataset
DATASET_ROOT: '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019'
SPLIT: test
CATEGORY_NAMES:
- 'pedestrain'
- 'people'
- 'bicycle'
- 'car'
- 'van'
- 'truck'
- 'tricycle'
- 'awning-tricycle'
- 'bus'
- 'motor'
CATEGORY_DICT:
0: 'pedestrain'
1: 'people'
2: 'bicycle'
3: 'car'
4: 'van'
5: 'truck'
6: 'tricycle'
7: 'awning-tricycle'
8: 'bus'
9: 'motor'
CERTAIN_SEQS:
-
IGNORE_SEQS: # Seqs you want to ignore
-
YAML_DICT: './data/Visdrone_all.yaml' # NOTE: ONLY for yolo v5 model loader(func DetectMultiBackend)
TRACK_EVAL: # If use TrackEval to evaluate, use these configs
'DISPLAY_LESS_PROGRESS': False
'GT_FOLDER': '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019/VisDrone2019-MOT-test-dev/annotations'
'TRACKERS_FOLDER': './tracker/results'
'SKIP_SPLIT_FOL': True
'TRACKER_SUB_FOLDER': ''
'SEQ_INFO':
'uav0000009_03358_v': 219
'uav0000073_00600_v': 328
'uav0000073_04464_v': 312
'uav0000077_00720_v': 780
'uav0000088_00290_v': 296
'uav0000119_02301_v': 179
'uav0000120_04775_v': 1000
'uav0000161_00000_v': 308
'uav0000188_00000_v': 260
'uav0000201_00000_v': 677
'uav0000249_00001_v': 360
'uav0000249_02688_v': 244
'uav0000297_00000_v': 146
'uav0000297_02761_v': 373
'uav0000306_00230_v': 420
'uav0000355_00001_v': 468
'uav0000370_00001_v': 265
'GT_LOC_FORMAT': '{gt_folder}/{seq}.txt'

View File

@@ -0,0 +1,51 @@
# Config file of VisDrone dataset
DATASET_ROOT: '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019'
SPLIT: test
CATEGORY_NAMES:
- 'pedestrain'
- 'car'
- 'van'
- 'truck'
- 'bus'
CATEGORY_DICT:
0: 'pedestrain'
1: 'car'
2: 'van'
3: 'truck'
4: 'bus'
CERTAIN_SEQS:
-
IGNORE_SEQS: # Seqs you want to ignore
-
YAML_DICT: './data/Visdrone_all.yaml' # NOTE: ONLY for yolo v5 model loader(func DetectMultiBackend)
TRACK_EVAL: # If use TrackEval to evaluate, use these configs
'DISPLAY_LESS_PROGRESS': False
'GT_FOLDER': '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019/VisDrone2019-MOT-test-dev/annotations'
'TRACKERS_FOLDER': './tracker/results'
'SKIP_SPLIT_FOL': True
'TRACKER_SUB_FOLDER': ''
'SEQ_INFO':
'uav0000009_03358_v': 219
'uav0000073_00600_v': 328
'uav0000073_04464_v': 312
'uav0000077_00720_v': 780
'uav0000088_00290_v': 296
'uav0000119_02301_v': 179
'uav0000120_04775_v': 1000
'uav0000161_00000_v': 308
'uav0000188_00000_v': 260
'uav0000201_00000_v': 677
'uav0000249_00001_v': 360
'uav0000249_02688_v': 244
'uav0000297_00000_v': 146
'uav0000297_02761_v': 373
'uav0000306_00230_v': 420
'uav0000355_00001_v': 468
'uav0000370_00001_v': 265
'GT_LOC_FORMAT': '{gt_folder}/{seq}.txt'

View File

@@ -0,0 +1,37 @@
import time
class Timer(object):
"""A simple timer."""
def __init__(self):
self.total_time = 0.
self.calls = 0
self.start_time = 0.
self.diff = 0.
self.average_time = 0.
self.duration = 0.
def tic(self):
# using time.time instead of time.clock because time time.clock
# does not normalize for multithreading
self.start_time = time.time()
def toc(self, average=True):
self.diff = time.time() - self.start_time
self.total_time += self.diff
self.calls += 1
self.average_time = self.total_time / self.calls
if average:
self.duration = self.average_time
else:
self.duration = self.diff
return self.duration
def clear(self):
self.total_time = 0.
self.calls = 0
self.start_time = 0.
self.diff = 0.
self.average_time = 0.
self.duration = 0.

View File

@@ -0,0 +1,305 @@
"""
main code for track
"""
import sys, os
import numpy as np
import torch
import cv2
from PIL import Image
from tqdm import tqdm
import yaml
from loguru import logger
import argparse
from tracking_utils.envs import select_device
from tracking_utils.tools import *
from tracking_utils.visualization import plot_img, save_video
from my_timer import Timer
from tracker_dataloader import TestDataset
# trackers
from trackers.byte_tracker import ByteTracker
from trackers.sort_tracker import SortTracker
from trackers.botsort_tracker import BotTracker
from trackers.c_biou_tracker import C_BIoUTracker
from trackers.ocsort_tracker import OCSortTracker
from trackers.deepsort_tracker import DeepSortTracker
from trackers.strongsort_tracker import StrongSortTracker
from trackers.sparse_tracker import SparseTracker
# YOLOX modules
try:
from yolox.exp import get_exp
from yolox_utils.postprocess import postprocess_yolox
from yolox.utils import fuse_model
except Exception as e:
logger.warning(e)
logger.warning('Load yolox fail. If you want to use yolox, please check the installation.')
pass
# YOLOv7 modules
try:
sys.path.append(os.getcwd())
from models.experimental import attempt_load
from utils.torch_utils import select_device, time_synchronized, TracedModel
from utils.general import non_max_suppression, scale_coords, check_img_size
from yolov7_utils.postprocess import postprocess as postprocess_yolov7
except Exception as e:
logger.warning(e)
logger.warning('Load yolov7 fail. If you want to use yolov7, please check the installation.')
pass
# YOLOv8 modules
try:
from ultralytics import YOLO
from yolov8_utils.postprocess import postprocess as postprocess_yolov8
except Exception as e:
logger.warning(e)
logger.warning('Load yolov8 fail. If you want to use yolov8, please check the installation.')
pass
TRACKER_DICT = {
'sort': SortTracker,
'bytetrack': ByteTracker,
'botsort': BotTracker,
'c_bioutrack': C_BIoUTracker,
'ocsort': OCSortTracker,
'deepsort': DeepSortTracker,
'strongsort': StrongSortTracker,
'sparsetrack': SparseTracker
}
def get_args():
parser = argparse.ArgumentParser()
"""general"""
parser.add_argument('--dataset', type=str, default='visdrone_part', help='visdrone, mot17, etc.')
parser.add_argument('--detector', type=str, default='yolov8', help='yolov7, yolox, etc.')
parser.add_argument('--tracker', type=str, default='sort', help='sort, deepsort, etc')
parser.add_argument('--reid_model', type=str, default='osnet_x0_25', help='osnet or deppsort')
parser.add_argument('--kalman_format', type=str, default='default', help='use what kind of Kalman, sort, deepsort, byte, etc.')
parser.add_argument('--img_size', type=int, default=1280, help='image size, [h, w]')
parser.add_argument('--conf_thresh', type=float, default=0.2, help='filter tracks')
parser.add_argument('--nms_thresh', type=float, default=0.7, help='thresh for NMS')
parser.add_argument('--iou_thresh', type=float, default=0.5, help='IOU thresh to filter tracks')
parser.add_argument('--device', type=str, default='6', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
"""yolox"""
parser.add_argument('--yolox_exp_file', type=str, default='./tracker/yolox_utils/yolox_m.py')
"""model path"""
parser.add_argument('--detector_model_path', type=str, default='./weights/best.pt', help='model path')
parser.add_argument('--trace', type=bool, default=False, help='traced model of YOLO v7')
# other model path
parser.add_argument('--reid_model_path', type=str, default='./weights/osnet_x0_25.pth', help='path for reid model path')
parser.add_argument('--dhn_path', type=str, default='./weights/DHN.pth', help='path of DHN path for DeepMOT')
"""other options"""
parser.add_argument('--discard_reid', action='store_true', help='discard reid model, only work in bot-sort etc. which need a reid part')
parser.add_argument('--track_buffer', type=int, default=30, help='tracking buffer')
parser.add_argument('--gamma', type=float, default=0.1, help='param to control fusing motion and apperance dist')
parser.add_argument('--min_area', type=float, default=150, help='use to filter small bboxs')
parser.add_argument('--save_dir', type=str, default='track_results/{dataset_name}/{split}')
parser.add_argument('--save_images', action='store_true', help='save tracking results (image)')
parser.add_argument('--save_videos', action='store_true', help='save tracking results (video)')
parser.add_argument('--track_eval', type=bool, default=True, help='Use TrackEval to evaluate')
return parser.parse_args()
def main(args, dataset_cfgs):
"""1. set some params"""
# NOTE: if save video, you must save image
if args.save_videos:
args.save_images = True
"""2. load detector"""
device = select_device(args.device)
if args.detector == 'yolox':
exp = get_exp(args.yolox_exp_file, None) # TODO: modify num_classes etc. for specific dataset
model_img_size = exp.input_size
model = exp.get_model()
model.to(device)
model.eval()
logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
ckpt = torch.load(args.detector_model_path, map_location=device)
model.load_state_dict(ckpt['model'])
logger.info("loaded checkpoint done")
model = fuse_model(model)
stride = None # match with yolo v7
logger.info(f'Now detector is on device {next(model.parameters()).device}')
elif args.detector == 'yolov7':
logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
model = attempt_load(args.detector_model_path, map_location=device)
# get inference img size
stride = int(model.stride.max()) # model stride
model_img_size = check_img_size(args.img_size, s=stride) # check img_size
# Traced model
model = TracedModel(model, device=device, img_size=args.img_size)
# model.half()
logger.info("loaded checkpoint done")
logger.info(f'Now detector is on device {next(model.parameters()).device}')
elif args.detector == 'yolov8':
logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
model = YOLO(args.detector_model_path)
model_img_size = [None, None]
stride = None
logger.info("loaded checkpoint done")
else:
logger.error(f"detector {args.detector} is not supprted")
exit(0)
"""3. load sequences"""
DATA_ROOT = dataset_cfgs['DATASET_ROOT']
SPLIT = dataset_cfgs['SPLIT']
seqs = sorted(os.listdir(os.path.join(DATA_ROOT, 'images', SPLIT)))
seqs = [seq for seq in seqs if seq not in dataset_cfgs['IGNORE_SEQS']]
if not None in dataset_cfgs['CERTAIN_SEQS']:
seqs = dataset_cfgs['CERTAIN_SEQS']
logger.info(f'Total {len(seqs)} seqs will be tracked: {seqs}')
save_dir = args.save_dir.format(dataset_name=args.dataset, split=SPLIT)
"""4. Tracking"""
# set timer
timer = Timer()
seq_fps = []
for seq in seqs:
logger.info(f'--------------tracking seq {seq}--------------')
dataset = TestDataset(DATA_ROOT, SPLIT, seq_name=seq, img_size=model_img_size, model=args.detector, stride=stride)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)
tracker = TRACKER_DICT[args.tracker](args, )
process_bar = enumerate(data_loader)
process_bar = tqdm(process_bar, total=len(data_loader), ncols=150)
results = []
for frame_idx, (ori_img, img) in process_bar:
# start timing this frame
timer.tic()
if args.detector == 'yolov8':
img = img.squeeze(0).cpu().numpy()
else:
img = img.to(device) # (1, C, H, W)
img = img.float()
ori_img = ori_img.squeeze(0)
# get detector output
with torch.no_grad():
if args.detector == 'yolov8':
output = model.predict(img, conf=args.conf_thresh, iou=args.nms_thresh)
else:
output = model(img)
# postprocess output to original scales
if args.detector == 'yolox':
output = postprocess_yolox(output, len(dataset_cfgs['CATEGORY_NAMES']), conf_thresh=args.conf_thresh,
img=img, ori_img=ori_img)
elif args.detector == 'yolov7':
output = postprocess_yolov7(output, args.conf_thresh, args.nms_thresh, img.shape[2:], ori_img.shape)
elif args.detector == 'yolov8':
output = postprocess_yolov8(output)
else: raise NotImplementedError
# output: (tlbr, conf, cls)
# convert tlbr to tlwh
if isinstance(output, torch.Tensor):
output = output.detach().cpu().numpy()
output[:, 2] -= output[:, 0]
output[:, 3] -= output[:, 1]
current_tracks = tracker.update(output, img, ori_img.cpu().numpy())
# save results
cur_tlwh, cur_id, cur_cls, cur_score = [], [], [], []
for trk in current_tracks:
bbox = trk.tlwh
id = trk.track_id
cls = trk.category
score = trk.score
# filter low area bbox
if bbox[2] * bbox[3] > args.min_area:
cur_tlwh.append(bbox)
cur_id.append(id)
cur_cls.append(cls)
cur_score.append(score)
# results.append((frame_id + 1, id, bbox, cls))
results.append((frame_idx + 1, cur_id, cur_tlwh, cur_cls, cur_score))
timer.toc()
if args.save_images:
plot_img(img=ori_img, frame_id=frame_idx, results=[cur_tlwh, cur_id, cur_cls],
save_dir=os.path.join(save_dir, 'vis_results'))
save_results(folder_name=os.path.join(args.dataset, SPLIT),
seq_name=seq,
results=results)
# show the fps
seq_fps.append(frame_idx / timer.total_time)
logger.info(f'fps of seq {seq}: {seq_fps[-1]}')
timer.clear()
if args.save_videos:
save_video(images_path=os.path.join(save_dir, 'vis_results'))
logger.info(f'save video of {seq} done')
# show the average fps
logger.info(f'average fps: {np.mean(seq_fps)}')
if __name__ == '__main__':
args = get_args()
with open(f'./tracker/config_files/{args.dataset}.yaml', 'r') as f:
cfgs = yaml.load(f, Loader=yaml.FullLoader)
main(args, cfgs)

View File

@@ -0,0 +1,266 @@
"""
main code for track
"""
import sys, os
import numpy as np
import torch
import cv2
from PIL import Image
from tqdm import tqdm
import yaml
from loguru import logger
import argparse
from tracking_utils.envs import select_device
from tracking_utils.tools import *
from tracking_utils.visualization import plot_img, save_video
from tracker_dataloader import TestDataset, DemoDataset
# trackers
from trackers.byte_tracker import ByteTracker
from trackers.sort_tracker import SortTracker
from trackers.botsort_tracker import BotTracker
from trackers.c_biou_tracker import C_BIoUTracker
from trackers.ocsort_tracker import OCSortTracker
from trackers.deepsort_tracker import DeepSortTracker
# YOLOX modules
try:
from yolox.exp import get_exp
from yolox_utils.postprocess import postprocess_yolox
from yolox.utils import fuse_model
except Exception as e:
logger.warning(e)
logger.warning('Load yolox fail. If you want to use yolox, please check the installation.')
pass
# YOLOv7 modules
try:
sys.path.append(os.getcwd())
from models.experimental import attempt_load
from utils.torch_utils import select_device, time_synchronized, TracedModel
from utils.general import non_max_suppression, scale_coords, check_img_size
from yolov7_utils.postprocess import postprocess as postprocess_yolov7
except Exception as e:
logger.warning(e)
logger.warning('Load yolov7 fail. If you want to use yolov7, please check the installation.')
pass
# YOLOv8 modules
try:
from ultralytics import YOLO
from yolov8_utils.postprocess import postprocess as postprocess_yolov8
except Exception as e:
logger.warning(e)
logger.warning('Load yolov8 fail. If you want to use yolov8, please check the installation.')
pass
TRACKER_DICT = {
'sort': SortTracker,
'bytetrack': ByteTracker,
'botsort': BotTracker,
'c_bioutrack': C_BIoUTracker,
'ocsort': OCSortTracker,
'deepsort': DeepSortTracker
}
def get_args():
parser = argparse.ArgumentParser()
"""general"""
parser.add_argument('--obj', type=str, required=True, default='demo.mp4', help='video or images folder PATH')
parser.add_argument('--detector', type=str, default='yolov8', help='yolov7, yolox, etc.')
parser.add_argument('--tracker', type=str, default='sort', help='sort, deepsort, etc')
parser.add_argument('--reid_model', type=str, default='osnet_x0_25', help='osnet or deppsort')
parser.add_argument('--kalman_format', type=str, default='default', help='use what kind of Kalman, sort, deepsort, byte, etc.')
parser.add_argument('--img_size', type=int, default=1280, help='image size, [h, w]')
parser.add_argument('--conf_thresh', type=float, default=0.2, help='filter tracks')
parser.add_argument('--nms_thresh', type=float, default=0.7, help='thresh for NMS')
parser.add_argument('--iou_thresh', type=float, default=0.5, help='IOU thresh to filter tracks')
parser.add_argument('--device', type=str, default='6', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
"""yolox"""
parser.add_argument('--num_classes', type=int, default=1)
parser.add_argument('--yolox_exp_file', type=str, default='./tracker/yolox_utils/yolox_m.py')
"""model path"""
parser.add_argument('--detector_model_path', type=str, default='./weights/best.pt', help='model path')
parser.add_argument('--trace', type=bool, default=False, help='traced model of YOLO v7')
# other model path
parser.add_argument('--reid_model_path', type=str, default='./weights/osnet_x0_25.pth', help='path for reid model path')
parser.add_argument('--dhn_path', type=str, default='./weights/DHN.pth', help='path of DHN path for DeepMOT')
"""other options"""
parser.add_argument('--discard_reid', action='store_true', help='discard reid model, only work in bot-sort etc. which need a reid part')
parser.add_argument('--track_buffer', type=int, default=30, help='tracking buffer')
parser.add_argument('--gamma', type=float, default=0.1, help='param to control fusing motion and apperance dist')
parser.add_argument('--min_area', type=float, default=150, help='use to filter small bboxs')
parser.add_argument('--save_dir', type=str, default='track_demo_results')
parser.add_argument('--save_images', action='store_true', help='save tracking results (image)')
parser.add_argument('--save_videos', action='store_true', help='save tracking results (video)')
parser.add_argument('--track_eval', type=bool, default=True, help='Use TrackEval to evaluate')
return parser.parse_args()
def main(args):
"""1. set some params"""
# NOTE: if save video, you must save image
if args.save_videos:
args.save_images = True
"""2. load detector"""
device = select_device(args.device)
if args.detector == 'yolox':
exp = get_exp(args.yolox_exp_file, None) # TODO: modify num_classes etc. for specific dataset
model_img_size = exp.input_size
model = exp.get_model()
model.to(device)
model.eval()
logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
ckpt = torch.load(args.detector_model_path, map_location=device)
model.load_state_dict(ckpt['model'])
logger.info("loaded checkpoint done")
model = fuse_model(model)
stride = None # match with yolo v7
logger.info(f'Now detector is on device {next(model.parameters()).device}')
elif args.detector == 'yolov7':
logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
model = attempt_load(args.detector_model_path, map_location=device)
# get inference img size
stride = int(model.stride.max()) # model stride
model_img_size = check_img_size(args.img_size, s=stride) # check img_size
# Traced model
model = TracedModel(model, device=device, img_size=args.img_size)
# model.half()
logger.info("loaded checkpoint done")
logger.info(f'Now detector is on device {next(model.parameters()).device}')
elif args.detector == 'yolov8':
logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
model = YOLO(args.detector_model_path)
model_img_size = [None, None]
stride = None
logger.info("loaded checkpoint done")
else:
logger.error(f"detector {args.detector} is not supprted")
exit(0)
"""3. load sequences"""
dataset = DemoDataset(file_name=args.obj, img_size=model_img_size, model=args.detector, stride=stride, )
data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)
tracker = TRACKER_DICT[args.tracker](args, )
save_dir = args.save_dir
process_bar = enumerate(data_loader)
process_bar = tqdm(process_bar, total=len(data_loader), ncols=150)
results = []
"""4. Tracking"""
for frame_idx, (ori_img, img) in process_bar:
if args.detector == 'yolov8':
img = img.squeeze(0).cpu().numpy()
else:
img = img.to(device) # (1, C, H, W)
img = img.float()
ori_img = ori_img.squeeze(0)
# get detector output
with torch.no_grad():
if args.detector == 'yolov8':
output = model.predict(img, conf=args.conf_thresh, iou=args.nms_thresh)
else:
output = model(img)
# postprocess output to original scales
if args.detector == 'yolox':
output = postprocess_yolox(output, args.num_classes, conf_thresh=args.conf_thresh,
img=img, ori_img=ori_img)
elif args.detector == 'yolov7':
output = postprocess_yolov7(output, args.conf_thresh, args.nms_thresh, img.shape[2:], ori_img.shape)
elif args.detector == 'yolov8':
output = postprocess_yolov8(output)
else: raise NotImplementedError
# output: (tlbr, conf, cls)
# convert tlbr to tlwh
if isinstance(output, torch.Tensor):
output = output.detach().cpu().numpy()
output[:, 2] -= output[:, 0]
output[:, 3] -= output[:, 1]
current_tracks = tracker.update(output, img, ori_img.cpu().numpy())
# save results
cur_tlwh, cur_id, cur_cls, cur_score = [], [], [], []
for trk in current_tracks:
bbox = trk.tlwh
id = trk.track_id
cls = trk.category
score = trk.score
# filter low area bbox
if bbox[2] * bbox[3] > args.min_area:
cur_tlwh.append(bbox)
cur_id.append(id)
cur_cls.append(cls)
cur_score.append(score)
# results.append((frame_id + 1, id, bbox, cls))
results.append((frame_idx + 1, cur_id, cur_tlwh, cur_cls, cur_score))
if args.save_images:
plot_img(img=ori_img, frame_id=frame_idx, results=[cur_tlwh, cur_id, cur_cls],
save_dir=os.path.join(save_dir, 'vis_results'))
save_results(folder_name=os.path.join(save_dir, 'txt_results'),
seq_name='demo',
results=results)
if args.save_videos:
save_video(images_path=os.path.join(save_dir, 'vis_results'))
logger.info(f'save video done')
if __name__ == '__main__':
args = get_args()
main(args)

View File

@@ -0,0 +1,223 @@
import numpy as np
import torch
import cv2
import os
import os.path as osp
from torch.utils.data import Dataset
class TestDataset(Dataset):
""" This class generate origin image, preprocessed image for inference
NOTE: for every sequence, initialize a TestDataset class
"""
def __init__(self, data_root, split, seq_name, img_size=[640, 640], legacy_yolox=True, model='yolox', **kwargs) -> None:
"""
Args:
data_root: path for entire dataset
seq_name: name of sequence
img_size: List[int, int] | Tuple[int, int] image size for detection model
legacy_yolox: bool, to be compatible with older versions of yolox
model: detection model, currently support x, v7, v8
"""
super().__init__()
self.model = model
self.data_root = data_root
self.seq_name = seq_name
self.img_size = img_size
self.split = split
self.seq_path = osp.join(self.data_root, 'images', self.split, self.seq_name)
self.imgs_in_seq = sorted(os.listdir(self.seq_path))
self.legacy = legacy_yolox
self.other_param = kwargs
def __getitem__(self, idx):
if self.model == 'yolox':
return self._getitem_yolox(idx)
elif self.model == 'yolov7':
return self._getitem_yolov7(idx)
elif self.model == 'yolov8':
return self._getitem_yolov8(idx)
def _getitem_yolox(self, idx):
img = cv2.imread(osp.join(self.seq_path, self.imgs_in_seq[idx]))
img_resized, _ = self._preprocess_yolox(img, self.img_size, )
if self.legacy:
img_resized = img_resized[::-1, :, :].copy() # BGR -> RGB
img_resized /= 255.0
img_resized -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
img_resized /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
return torch.from_numpy(img), torch.from_numpy(img_resized)
def _getitem_yolov7(self, idx):
img = cv2.imread(osp.join(self.seq_path, self.imgs_in_seq[idx]))
img_resized = self._preprocess_yolov7(img, ) # torch.Tensor
return torch.from_numpy(img), img_resized
def _getitem_yolov8(self, idx):
img = cv2.imread(osp.join(self.seq_path, self.imgs_in_seq[idx])) # (h, w, c)
# img = self._preprocess_yolov8(img)
return torch.from_numpy(img), torch.from_numpy(img)
def _preprocess_yolox(self, img, size, swap=(2, 0, 1)):
""" convert origin image to resized image, YOLOX-manner
Args:
img: np.ndarray
size: List[int, int] | Tuple[int, int]
swap: (H, W, C) -> (C, H, W)
Returns:
np.ndarray, float
"""
if len(img.shape) == 3:
padded_img = np.ones((size[0], size[1], 3), dtype=np.uint8) * 114
else:
padded_img = np.ones(size, dtype=np.uint8) * 114
r = min(size[0] / img.shape[0], size[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * r), int(img.shape[0] * r)),
interpolation=cv2.INTER_LINEAR,
).astype(np.uint8)
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
padded_img = padded_img.transpose(swap)
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
return padded_img, r
def _preprocess_yolov7(self, img, ):
img_resized = self._letterbox(img, new_shape=self.img_size, stride=self.other_param['stride'], )[0]
img_resized = img_resized[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
img_resized = np.ascontiguousarray(img_resized)
img_resized = torch.from_numpy(img_resized).float()
img_resized /= 255.0
return img_resized
def _preprocess_yolov8(self, img, ):
img = img.transpose((2, 0, 1))
img = np.ascontiguousarray(img)
return img
def _letterbox(self, img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better test mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return img, ratio, (dw, dh)
def __len__(self, ):
return len(self.imgs_in_seq)
class DemoDataset(TestDataset):
"""
dataset for demo
"""
def __init__(self, file_name, img_size=[640, 640], model='yolox', legacy_yolox=True, **kwargs) -> None:
self.file_name = file_name
self.model = model
self.img_size = img_size
self.is_video = '.mp4' in file_name or '.avi' in file_name
if not self.is_video:
self.imgs_in_seq = sorted(os.listdir(file_name))
else:
self.imgs_in_seq = []
self.cap = cv2.VideoCapture(file_name)
while True:
ret, frame = self.cap.read()
if not ret: break
self.imgs_in_seq.append(frame)
self.legacy = legacy_yolox
def __getitem__(self, idx):
if not self.is_video:
img = cv2.imread(osp.join(self.file_name, self.imgs_in_seq[idx]))
else:
img = self.imgs_in_seq[idx]
if self.model == 'yolox':
return self._getitem_yolox(img)
elif self.model == 'yolov7':
return self._getitem_yolov7(img)
elif self.model == 'yolov8':
return self._getitem_yolov8(img)
def _getitem_yolox(self, img):
img_resized, _ = self._preprocess_yolox(img, self.img_size, )
if self.legacy:
img_resized = img_resized[::-1, :, :].copy() # BGR -> RGB
img_resized /= 255.0
img_resized -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
img_resized /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
return torch.from_numpy(img), torch.from_numpy(img_resized)
def _getitem_yolov7(self, img):
img_resized = self._preprocess_yolov7(img, ) # torch.Tensor
return torch.from_numpy(img), img_resized
def _getitem_yolov8(self, img):
# img = self._preprocess_yolov8(img)
return torch.from_numpy(img), torch.from_numpy(img)

View File

@@ -0,0 +1,133 @@
import numpy as np
from collections import OrderedDict
class TrackState(object):
New = 0
Tracked = 1
Lost = 2
Removed = 3
class BaseTrack(object):
_count = 0
track_id = 0
is_activated = False
state = TrackState.New
history = OrderedDict()
features = []
curr_feature = None
score = 0
start_frame = 0
frame_id = 0
time_since_update = 0
# multi-camera
location = (np.inf, np.inf)
@property
def end_frame(self):
return self.frame_id
@staticmethod
def next_id():
BaseTrack._count += 1
return BaseTrack._count
def activate(self, *args):
raise NotImplementedError
def predict(self):
raise NotImplementedError
def update(self, *args, **kwargs):
raise NotImplementedError
def mark_lost(self):
self.state = TrackState.Lost
def mark_removed(self):
self.state = TrackState.Removed
@property
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
ret[:2] -= ret[2:] / 2
return ret
@property
def tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
@property
def xywh(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[:2] += ret[2:] / 2.0
return ret
@staticmethod
# @jit(nopython=True)
def tlwh_to_xyah(tlwh):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
@staticmethod
def tlwh_to_xywh(tlwh):
"""Convert bounding box to format `(center x, center y, width,
height)`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
return ret
@staticmethod
def tlwh_to_xysa(tlwh):
"""Convert bounding box to format `(center x, center y, width,
height)`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
ret[2] = tlwh[2] * tlwh[3]
ret[3] = tlwh[2] / tlwh[3]
return ret
def to_xyah(self):
return self.tlwh_to_xyah(self.tlwh)
def to_xywh(self):
return self.tlwh_to_xywh(self.tlwh)
@staticmethod
def tlbr_to_tlwh(tlbr):
ret = np.asarray(tlbr).copy()
ret[2:] -= ret[:2]
return ret
@staticmethod
# @jit(nopython=True)
def tlwh_to_tlbr(tlwh):
ret = np.asarray(tlwh).copy()
ret[2:] += ret[:2]
return ret
def __repr__(self):
return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)

View File

@@ -0,0 +1,329 @@
"""
Bot sort
"""
import numpy as np
import torch
from torchvision.ops import nms
import cv2
import torchvision.transforms as T
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet, Tracklet_w_reid
from .matching import *
from .reid_models.OSNet import *
from .reid_models.load_model_tools import load_pretrained_weights
from .reid_models.deepsort_reid import Extractor
from .camera_motion_compensation import GMC
REID_MODEL_DICT = {
'osnet_x1_0': osnet_x1_0,
'osnet_x0_75': osnet_x0_75,
'osnet_x0_5': osnet_x0_5,
'osnet_x0_25': osnet_x0_25,
'deepsort': Extractor
}
def load_reid_model(reid_model, reid_model_path):
if 'osnet' in reid_model:
func = REID_MODEL_DICT[reid_model]
model = func(num_classes=1, pretrained=False, )
load_pretrained_weights(model, reid_model_path)
model.cuda().eval()
elif 'deepsort' in reid_model:
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
else:
raise NotImplementedError
return model
class BotTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
self.with_reid = not args.discard_reid
self.reid_model, self.crop_transforms = None, None
if self.with_reid:
self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
self.crop_transforms = T.Compose([
# T.ToPILImage(),
# T.Resize(size=(256, 128)),
T.ToTensor(), # (c, 128, 256)
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# camera motion compensation module
self.gmc = GMC(method='orb', downscale=2, verbose=None)
def reid_preprocess(self, obj_bbox):
"""
preprocess cropped object bboxes
obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
return:
torch.Tensor of shape (c, 128, 256)
"""
obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=(128, 128)) # shape: (128, 256, c)
return self.crop_transforms(obj_bbox)
def get_feature(self, tlwhs, ori_img):
"""
get apperance feature of an object
tlwhs: shape (num_of_objects, 4)
ori_img: original image, np.ndarray, shape(H, W, C)
"""
obj_bbox = []
for tlwh in tlwhs:
tlwh = list(map(int, tlwh))
# if any(tlbr_ == -1 for tlbr_ in tlwh):
# print(tlwh)
tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
obj_bbox.append(tlbr_tensor)
if not obj_bbox:
return np.array([])
obj_bbox = torch.stack(obj_bbox, dim=0)
obj_bbox = obj_bbox.cuda()
features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim)
return features.cpu().detach().numpy()
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlwh format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
inds_low = scores > 0.1
inds_high = scores < self.args.conf_thresh
inds_second = np.logical_and(inds_low, inds_high)
dets_second = bboxes[inds_second]
dets = bboxes[remain_inds]
cates = categories[remain_inds]
cates_second = categories[inds_second]
scores_keep = scores[remain_inds]
scores_second = scores[inds_second]
"""Step 1: Extract reid features"""
if self.with_reid:
features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
if len(dets) > 0:
if self.with_reid:
detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
(tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
else:
detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with high score detection boxes'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
# Camera motion compensation
warp = self.gmc.apply(ori_img, dets)
self.gmc.multi_gmc(tracklet_pool, warp)
self.gmc.multi_gmc(unconfirmed, warp)
ious_dists = iou_distance(tracklet_pool, detections)
ious_dists_mask = (ious_dists > 0.5) # high conf iou
if self.with_reid:
# mixed cost matrix
emb_dists = embedding_distance(tracklet_pool, detections) / 2.0
raw_emb_dists = emb_dists.copy()
emb_dists[emb_dists > 0.25] = 1.0
emb_dists[ious_dists_mask] = 1.0
dists = np.minimum(ious_dists, emb_dists)
else:
dists = ious_dists
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
''' Step 3: Second association, with low score detection boxes'''
# association the untrack to the low score detections
if len(dets_second) > 0:
'''Detections'''
detections_second = [Tracklet(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
else:
detections_second = []
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
dists = iou_distance(r_tracked_tracklets, detections_second)
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_tracklets[itracked]
det = detections_second[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
for it in u_track:
track = r_tracked_tracklets[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
ious_dists = iou_distance(unconfirmed, detections)
ious_dists_mask = (ious_dists > 0.5)
if self.with_reid:
emb_dists = embedding_distance(unconfirmed, detections) / 2.0
raw_emb_dists = emb_dists.copy()
emb_dists[emb_dists > 0.25] = 1.0
emb_dists[ious_dists_mask] = 1.0
dists = np.minimum(ious_dists, emb_dists)
else:
dists = ious_dists
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,201 @@
"""
ByteTrack
"""
import numpy as np
from collections import deque
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet
from .matching import *
class ByteTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlbr format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
inds_low = scores > 0.1
inds_high = scores < self.args.conf_thresh
inds_second = np.logical_and(inds_low, inds_high)
dets_second = bboxes[inds_second]
dets = bboxes[remain_inds]
cates = categories[remain_inds]
cates_second = categories[inds_second]
scores_keep = scores[remain_inds]
scores_second = scores[inds_second]
if len(dets) > 0:
'''Detections'''
detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with high score detection boxes'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
dists = iou_distance(tracklet_pool, detections)
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
''' Step 3: Second association, with low score detection boxes'''
# association the untrack to the low score detections
if len(dets_second) > 0:
'''Detections'''
detections_second = [Tracklet(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
else:
detections_second = []
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
dists = iou_distance(r_tracked_tracklets, detections_second)
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_tracklets[itracked]
det = detections_second[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
for it in u_track:
track = r_tracked_tracklets[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,204 @@
"""
C_BIoU Track
"""
import numpy as np
from collections import deque
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet, Tracklet_w_bbox_buffer
from .matching import *
class C_BIoUTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlbr format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
inds_low = scores > 0.1
inds_high = scores < self.args.conf_thresh
inds_second = np.logical_and(inds_low, inds_high)
dets_second = bboxes[inds_second]
dets = bboxes[remain_inds]
cates = categories[remain_inds]
cates_second = categories[inds_second]
scores_keep = scores[remain_inds]
scores_second = scores[inds_second]
if len(dets) > 0:
'''Detections'''
detections = [Tracklet_w_bbox_buffer(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with high score detection boxes'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
dists = buffered_iou_distance(tracklet_pool, detections, level=1)
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
''' Step 3: Second association, with low score detection boxes'''
# association the untrack to the low score detections
if len(dets_second) > 0:
'''Detections'''
detections_second = [Tracklet_w_bbox_buffer(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
else:
detections_second = []
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
dists = buffered_iou_distance(r_tracked_tracklets, detections_second, level=2)
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_tracklets[itracked]
det = detections_second[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
for it in u_track:
track = r_tracked_tracklets[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = buffered_iou_distance(unconfirmed, detections, level=1)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,264 @@
import cv2
import numpy as np
import copy
import matplotlib.pyplot as plt
"""GMC Module"""
class GMC:
def __init__(self, method='orb', downscale=2, verbose=None):
super(GMC, self).__init__()
self.method = method
self.downscale = max(1, int(downscale))
if self.method == 'orb':
self.detector = cv2.FastFeatureDetector_create(20)
self.extractor = cv2.ORB_create()
self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
elif self.method == 'sift':
self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
self.matcher = cv2.BFMatcher(cv2.NORM_L2)
elif self.method == 'ecc':
number_of_iterations = 100
termination_eps = 1e-5
self.warp_mode = cv2.MOTION_EUCLIDEAN
self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
elif self.method == 'file' or self.method == 'files':
seqName = verbose[0]
ablation = verbose[1]
if ablation:
filePath = r'tracker/GMC_files/MOT17_ablation'
else:
filePath = r'tracker/GMC_files/MOTChallenge'
if '-FRCNN' in seqName:
seqName = seqName[:-6]
elif '-DPM' in seqName:
seqName = seqName[:-4]
elif '-SDP' in seqName:
seqName = seqName[:-4]
self.gmcFile = open(filePath + "/GMC-" + seqName + ".txt", 'r')
if self.gmcFile is None:
raise ValueError("Error: Unable to open GMC file in directory:" + filePath)
elif self.method == 'none' or self.method == 'None':
self.method = 'none'
else:
raise ValueError("Error: Unknown CMC method:" + method)
self.prevFrame = None
self.prevKeyPoints = None
self.prevDescriptors = None
self.initializedFirstFrame = False
def apply(self, raw_frame, detections=None):
if self.method == 'orb' or self.method == 'sift':
return self.applyFeaures(raw_frame, detections)
elif self.method == 'ecc':
return self.applyEcc(raw_frame, detections)
elif self.method == 'file':
return self.applyFile(raw_frame, detections)
elif self.method == 'none':
return np.eye(2, 3)
else:
return np.eye(2, 3)
def applyEcc(self, raw_frame, detections=None):
# Initialize
height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
H = np.eye(2, 3, dtype=np.float32)
# Downscale image (TODO: consider using pyramids)
if self.downscale > 1.0:
frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
width = width // self.downscale
height = height // self.downscale
# Handle first frame
if not self.initializedFirstFrame:
# Initialize data
self.prevFrame = frame.copy()
# Initialization done
self.initializedFirstFrame = True
return H
# Run the ECC algorithm. The results are stored in warp_matrix.
# (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
try:
(cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
except:
print('Warning: find transform failed. Set warp as identity')
return H
def applyFeaures(self, raw_frame, detections=None):
# Initialize
height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
H = np.eye(2, 3)
# Downscale image (TODO: consider using pyramids)
if self.downscale > 1.0:
# frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
width = width // self.downscale
height = height // self.downscale
# find the keypoints
mask = np.zeros_like(frame)
# mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255
mask[int(0.02 * height): int(0.98 * height), int(0.02 * width): int(0.98 * width)] = 255
if detections is not None:
for det in detections:
tlbr = (det[:4] / self.downscale).astype(np.int_)
mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0
keypoints = self.detector.detect(frame, mask)
# compute the descriptors
keypoints, descriptors = self.extractor.compute(frame, keypoints)
# Handle first frame
if not self.initializedFirstFrame:
# Initialize data
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
self.prevDescriptors = copy.copy(descriptors)
# Initialization done
self.initializedFirstFrame = True
return H
# Match descriptors.
knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)
# Filtered matches based on smallest spatial distance
matches = []
spatialDistances = []
maxSpatialDistance = 0.25 * np.array([width, height])
# Handle empty matches case
if len(knnMatches) == 0:
# Store to next iteration
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
self.prevDescriptors = copy.copy(descriptors)
return H
for m, n in knnMatches:
if m.distance < 0.9 * n.distance:
prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
currKeyPointLocation = keypoints[m.trainIdx].pt
spatialDistance = (prevKeyPointLocation[0] - currKeyPointLocation[0],
prevKeyPointLocation[1] - currKeyPointLocation[1])
if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \
(np.abs(spatialDistance[1]) < maxSpatialDistance[1]):
spatialDistances.append(spatialDistance)
matches.append(m)
meanSpatialDistances = np.mean(spatialDistances, 0)
stdSpatialDistances = np.std(spatialDistances, 0)
inliesrs = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances
goodMatches = []
prevPoints = []
currPoints = []
for i in range(len(matches)):
if inliesrs[i, 0] and inliesrs[i, 1]:
goodMatches.append(matches[i])
prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
currPoints.append(keypoints[matches[i].trainIdx].pt)
prevPoints = np.array(prevPoints)
currPoints = np.array(currPoints)
# Draw the keypoint matches on the output image
if 0:
matches_img = np.hstack((self.prevFrame, frame))
matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
W = np.size(self.prevFrame, 1)
for m in goodMatches:
prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
curr_pt[0] += W
color = np.random.randint(0, 255, (3,))
color = (int(color[0]), int(color[1]), int(color[2]))
matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)
matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)
plt.figure()
plt.imshow(matches_img)
plt.show()
# Find rigid matrix
if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
# Handle downscale
if self.downscale > 1.0:
H[0, 2] *= self.downscale
H[1, 2] *= self.downscale
else:
print('Warning: not enough matching points')
# Store to next iteration
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
self.prevDescriptors = copy.copy(descriptors)
return H
def applyFile(self, raw_frame, detections=None):
line = self.gmcFile.readline()
tokens = line.split("\t")
H = np.eye(2, 3, dtype=np.float_)
H[0, 0] = float(tokens[1])
H[0, 1] = float(tokens[2])
H[0, 2] = float(tokens[3])
H[1, 0] = float(tokens[4])
H[1, 1] = float(tokens[5])
H[1, 2] = float(tokens[6])
return H
@staticmethod
def multi_gmc(stracks, H=np.eye(2, 3)):
"""
GMC module prediction
:param stracks: List[Strack]
"""
if len(stracks) > 0:
multi_mean = np.asarray([st.kalman_filter.kf.x.copy() for st in stracks])
multi_covariance = np.asarray([st.kalman_filter.kf.P for st in stracks])
R = H[:2, :2]
R8x8 = np.kron(np.eye(4, dtype=float), R)
t = H[:2, 2]
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
mean = R8x8.dot(mean)
mean[:2] += t
cov = R8x8.dot(cov).dot(R8x8.transpose())
stracks[i].kalman_filter.kf.x = mean
stracks[i].kalman_filter.kf.P = cov

View File

@@ -0,0 +1,327 @@
"""
Deep Sort
"""
import numpy as np
import torch
from torchvision.ops import nms
import cv2
import torchvision.transforms as T
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet, Tracklet_w_reid
from .matching import *
from .reid_models.OSNet import *
from .reid_models.load_model_tools import load_pretrained_weights
from .reid_models.deepsort_reid import Extractor
REID_MODEL_DICT = {
'osnet_x1_0': osnet_x1_0,
'osnet_x0_75': osnet_x0_75,
'osnet_x0_5': osnet_x0_5,
'osnet_x0_25': osnet_x0_25,
'deepsort': Extractor
}
def load_reid_model(reid_model, reid_model_path):
if 'osnet' in reid_model:
func = REID_MODEL_DICT[reid_model]
model = func(num_classes=1, pretrained=False, )
load_pretrained_weights(model, reid_model_path)
model.cuda().eval()
elif 'deepsort' in reid_model:
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
else:
raise NotImplementedError
return model
class DeepSortTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
self.with_reid = not args.discard_reid
self.reid_model, self.crop_transforms = None, None
if self.with_reid:
self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
self.crop_transforms = T.Compose([
# T.ToPILImage(),
# T.Resize(size=(256, 128)),
T.ToTensor(), # (c, 128, 256)
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128)
def reid_preprocess(self, obj_bbox):
"""
preprocess cropped object bboxes
obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
return:
torch.Tensor of shape (c, 128, 256)
"""
obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size) # shape: (h, w, c)
return self.crop_transforms(obj_bbox)
def get_feature(self, tlwhs, ori_img):
"""
get apperance feature of an object
tlwhs: shape (num_of_objects, 4)
ori_img: original image, np.ndarray, shape(H, W, C)
"""
obj_bbox = []
for tlwh in tlwhs:
tlwh = list(map(int, tlwh))
# limit to the legal range
tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0)
tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
obj_bbox.append(tlbr_tensor)
if not obj_bbox:
return np.array([])
obj_bbox = torch.stack(obj_bbox, dim=0)
obj_bbox = obj_bbox.cuda()
features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim)
return features.cpu().detach().numpy()
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlbr format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
dets = bboxes[remain_inds]
cates = categories[remain_inds]
scores_keep = scores[remain_inds]
features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
if len(dets) > 0:
'''Detections'''
detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
(tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with appearance'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
matches, u_track, u_detection = matching_cascade(distance_metric=self.gated_metric,
matching_thresh=0.9,
cascade_depth=30,
tracks=tracklet_pool,
detections=detections
)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
'''Step 3: Second association, with iou'''
tracklet_for_iou = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
detection_for_iou = [detections[i] for i in u_detection]
dists = iou_distance(tracklet_for_iou, detection_for_iou)
matches, u_track, u_detection = linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = tracklet_for_iou[itracked]
det = detection_for_iou[idet]
if track.state == TrackState.Tracked:
track.update(detection_for_iou[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
for it in u_track:
track = tracklet_for_iou[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detection_for_iou[i] for i in u_detection]
dists = iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def gated_metric(self, tracks, dets):
"""
get cost matrix, firstly calculate apperence cost, then filter by Kalman state.
tracks: List[STrack]
dets: List[STrack]
"""
apperance_dist = nearest_embedding_distance(tracks=tracks, detections=dets, metric='cosine')
cost_matrix = self.gate_cost_matrix(apperance_dist, tracks, dets, )
return cost_matrix
def gate_cost_matrix(self, cost_matrix, tracks, dets, max_apperance_thresh=0.15, gated_cost=1e5, only_position=False):
"""
gate cost matrix by calculating the Kalman state distance and constrainted by
0.95 confidence interval of x2 distribution
cost_matrix: np.ndarray, shape (len(tracks), len(dets))
tracks: List[STrack]
dets: List[STrack]
gated_cost: a very largt const to infeasible associations
only_position: use [xc, yc, a, h] as state vector or only use [xc, yc]
return:
updated cost_matirx, np.ndarray
"""
gating_dim = 2 if only_position else 4
gating_threshold = chi2inv95[gating_dim]
measurements = np.asarray([Tracklet.tlwh_to_xyah(det.tlwh) for det in dets]) # (len(dets), 4)
cost_matrix[cost_matrix > max_apperance_thresh] = gated_cost
for row, track in enumerate(tracks):
gating_distance = track.kalman_filter.gating_distance(measurements, )
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
return cost_matrix
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,74 @@
from filterpy.kalman import KalmanFilter
import numpy as np
import scipy
class BaseKalman:
def __init__(self,
state_dim: int = 8,
observation_dim: int = 4,
F: np.ndarray = np.zeros((0, )),
P: np.ndarray = np.zeros((0, )),
Q: np.ndarray = np.zeros((0, )),
H: np.ndarray = np.zeros((0, )),
R: np.ndarray = np.zeros((0, )),
) -> None:
self.kf = KalmanFilter(dim_x=state_dim, dim_z=observation_dim, dim_u=0)
if F.shape[0] > 0: self.kf.F = F # if valid
if P.shape[0] > 0: self.kf.P = P
if Q.shape[0] > 0: self.kf.Q = Q
if H.shape[0] > 0: self.kf.H = H
if R.shape[0] > 0: self.kf.R = R
def initialize(self, observation):
return NotImplementedError
def predict(self, ):
self.kf.predict()
def update(self, observation, **kwargs):
self.kf.update(observation, self.R, self.H)
def get_state(self, ):
return self.kf.x
def gating_distance(self, measurements, only_position=False):
"""Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Parameters
----------
measurements : ndarray
An Nx4 dimensional matrix of N measurements, note the format (whether xywh or xyah or others)
should be identical to state definition
only_position : Optional[bool]
If True, distance computation is done with respect to the bounding
box center position only.
Returns
-------
ndarray
Returns an array of length N, where the i-th element contains the
squared Mahalanobis distance between (mean, covariance) and
`measurements[i]`.
"""
# map state space to measurement space
mean = self.kf.x.copy()
mean = np.dot(self.kf.H, mean)
covariance = np.linalg.multi_dot((self.kf.H, self.kf.P, self.kf.H.T))
if only_position:
mean, covariance = mean[:2], covariance[:2, :2]
measurements = measurements[:, :2]
cholesky_factor = np.linalg.cholesky(covariance)
d = measurements - mean
z = scipy.linalg.solve_triangular(
cholesky_factor, d.T, lower=True, check_finite=False,
overwrite_b=True)
squared_maha = np.sum(z * z, axis=0)
return squared_maha

View File

@@ -0,0 +1,99 @@
from numpy.core.multiarray import zeros as zeros
from .base_kalman import BaseKalman
import numpy as np
import cv2
class BotKalman(BaseKalman):
def __init__(self, ):
state_dim = 8 # [x, y, w, h, vx, vy, vw, vh]
observation_dim = 4
F = np.eye(state_dim, state_dim)
'''
[1, 0, 0, 0, 1, 0, 0]
[0, 1, 0, 0, 0, 1, 0]
...
'''
for i in range(state_dim // 2):
F[i, i + state_dim // 2] = 1
H = np.eye(state_dim // 2, state_dim)
super().__init__(state_dim=state_dim,
observation_dim=observation_dim,
F=F,
H=H)
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
def initialize(self, observation):
""" init x, P, Q, R
Args:
observation: x-y-w-h format
"""
# init x, P, Q, R
mean_pos = observation
mean_vel = np.zeros_like(observation)
self.kf.x = np.r_[mean_pos, mean_vel] # x_{0, 0}
std = [
2 * self._std_weight_position * observation[2], # related to h
2 * self._std_weight_position * observation[3],
2 * self._std_weight_position * observation[2],
2 * self._std_weight_position * observation[3],
10 * self._std_weight_velocity * observation[2],
10 * self._std_weight_velocity * observation[3],
10 * self._std_weight_velocity * observation[2],
10 * self._std_weight_velocity * observation[3],
]
self.kf.P = np.diag(np.square(std)) # P_{0, 0}
def predict(self, ):
""" predict step
x_{n + 1, n} = F * x_{n, n}
P_{n + 1, n} = F * P_{n, n} * F^T + Q
"""
std_pos = [
self._std_weight_position * self.kf.x[2],
self._std_weight_position * self.kf.x[3],
self._std_weight_position * self.kf.x[2],
self._std_weight_position * self.kf.x[3]]
std_vel = [
self._std_weight_velocity * self.kf.x[2],
self._std_weight_velocity * self.kf.x[3],
self._std_weight_velocity * self.kf.x[2],
self._std_weight_velocity * self.kf.x[3]]
Q = np.diag(np.square(np.r_[std_pos, std_vel]))
self.kf.predict(Q=Q)
def update(self, z):
""" update step
Args:
z: observation x-y-a-h format
K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
"""
std = [
self._std_weight_position * self.kf.x[2],
self._std_weight_position * self.kf.x[3],
self._std_weight_position * self.kf.x[2],
self._std_weight_position * self.kf.x[3]]
R = np.diag(np.square(std))
self.kf.update(z=z, R=R)

View File

@@ -0,0 +1,97 @@
from .base_kalman import BaseKalman
import numpy as np
class ByteKalman(BaseKalman):
def __init__(self, ):
state_dim = 8 # [x, y, a, h, vx, vy, va, vh]
observation_dim = 4
F = np.eye(state_dim, state_dim)
'''
[1, 0, 0, 0, 1, 0, 0]
[0, 1, 0, 0, 0, 1, 0]
...
'''
for i in range(state_dim // 2):
F[i, i + state_dim // 2] = 1
H = np.eye(state_dim // 2, state_dim)
super().__init__(state_dim=state_dim,
observation_dim=observation_dim,
F=F,
H=H)
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
def initialize(self, observation):
""" init x, P, Q, R
Args:
observation: x-y-a-h format
"""
# init x, P, Q, R
mean_pos = observation
mean_vel = np.zeros_like(observation)
self.kf.x = np.r_[mean_pos, mean_vel] # x_{0, 0}
std = [
2 * self._std_weight_position * observation[3], # related to h
2 * self._std_weight_position * observation[3],
1e-2,
2 * self._std_weight_position * observation[3],
10 * self._std_weight_velocity * observation[3],
10 * self._std_weight_velocity * observation[3],
1e-5,
10 * self._std_weight_velocity * observation[3],
]
self.kf.P = np.diag(np.square(std)) # P_{0, 0}
def predict(self, ):
""" predict step
x_{n + 1, n} = F * x_{n, n}
P_{n + 1, n} = F * P_{n, n} * F^T + Q
"""
std_pos = [
self._std_weight_position * self.kf.x[3],
self._std_weight_position * self.kf.x[3],
1e-2,
self._std_weight_position * self.kf.x[3]]
std_vel = [
self._std_weight_velocity * self.kf.x[3],
self._std_weight_velocity * self.kf.x[3],
1e-5,
self._std_weight_velocity * self.kf.x[3]]
Q = np.diag(np.square(np.r_[std_pos, std_vel]))
self.kf.predict(Q=Q)
def update(self, z):
""" update step
Args:
z: observation x-y-a-h format
K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
"""
std = [
self._std_weight_position * self.kf.x[3],
self._std_weight_position * self.kf.x[3],
1e-1,
self._std_weight_position * self.kf.x[3]]
R = np.diag(np.square(std))
self.kf.update(z=z, R=R)

View File

@@ -0,0 +1,144 @@
from numpy.core.multiarray import zeros as zeros
from .base_kalman import BaseKalman
import numpy as np
from copy import deepcopy
class OCSORTKalman(BaseKalman):
def __init__(self, ):
state_dim = 7 # [x, y, s, a, vx, vy, vs] s: area
observation_dim = 4
F = np.array([[1, 0, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 0, 1, 0],
[0, 0, 1, 0, 0, 0, 1],
[0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 1]])
H = np.eye(state_dim // 2 + 1, state_dim)
super().__init__(state_dim=state_dim,
observation_dim=observation_dim,
F=F,
H=H)
# TODO check
# give high uncertainty to the unobservable initial velocities
self.kf.R[2:, 2:] *= 10 # [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 10]]
self.kf.P[4:, 4:] *= 1000
self.kf.P *= 10
self.kf.Q[-1, -1] *= 0.01
self.kf.Q[4:, 4:] *= 0.01
# keep all observations
self.history_obs = []
self.attr_saved = None
self.observed = False
def initialize(self, observation):
"""
Args:
observation: x-y-s-a
"""
self.kf.x = self.kf.x.flatten()
self.kf.x[:4] = observation
def predict(self, ):
""" predict step
"""
# s + vs
if (self.kf.x[6] + self.kf.x[2] <= 0):
self.kf.x[6] *= 0.0
self.kf.predict()
def _freeze(self, ):
""" freeze all the param of Kalman
"""
self.attr_saved = deepcopy(self.kf.__dict__)
def _unfreeze(self, ):
""" when observe an lost object again, use the virtual trajectory
"""
if self.attr_saved is not None:
new_history = deepcopy(self.history_obs)
self.kf.__dict__ = self.attr_saved
self.history_obs = self.history_obs[:-1]
occur = [int(d is None) for d in new_history]
indices = np.where(np.array(occur)==0)[0]
index1 = indices[-2]
index2 = indices[-1]
box1 = new_history[index1]
x1, y1, s1, r1 = box1
w1 = np.sqrt(s1 * r1)
h1 = np.sqrt(s1 / r1)
box2 = new_history[index2]
x2, y2, s2, r2 = box2
w2 = np.sqrt(s2 * r2)
h2 = np.sqrt(s2 / r2)
time_gap = index2 - index1
dx = (x2-x1)/time_gap
dy = (y2-y1)/time_gap
dw = (w2-w1)/time_gap
dh = (h2-h1)/time_gap
for i in range(index2 - index1):
"""
The default virtual trajectory generation is by linear
motion (constant speed hypothesis), you could modify this
part to implement your own.
"""
x = x1 + (i+1) * dx
y = y1 + (i+1) * dy
w = w1 + (i+1) * dw
h = h1 + (i+1) * dh
s = w * h
r = w / float(h)
new_box = np.array([x, y, s, r]).reshape((4, 1))
"""
I still use predict-update loop here to refresh the parameters,
but this can be faster by directly modifying the internal parameters
as suggested in the paper. I keep this naive but slow way for
easy read and understanding
"""
self.kf.update(new_box)
if not i == (index2-index1-1):
self.kf.predict()
def update(self, z):
""" update step
For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman
Args:
z: observation x-y-s-a format
"""
self.history_obs.append(z)
if z is None:
if self.observed:
self._freeze()
self.observed = False
self.kf.update(z)
else:
if not self.observed: # Get observation, use online smoothing to re-update parameters
self._unfreeze()
self.kf.update(z)
self.observed = True

View File

@@ -0,0 +1,73 @@
from numpy.core.multiarray import zeros as zeros
from .base_kalman import BaseKalman
import numpy as np
from copy import deepcopy
class SORTKalman(BaseKalman):
def __init__(self, ):
state_dim = 7 # [x, y, s, a, vx, vy, vs] s: area
observation_dim = 4
F = np.array([[1, 0, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 0, 1, 0],
[0, 0, 1, 0, 0, 0, 1],
[0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 1]])
H = np.eye(state_dim // 2 + 1, state_dim)
super().__init__(state_dim=state_dim,
observation_dim=observation_dim,
F=F,
H=H)
# TODO check
# give high uncertainty to the unobservable initial velocities
self.kf.R[2:, 2:] *= 10 # [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 10]]
self.kf.P[4:, 4:] *= 1000
self.kf.P *= 10
self.kf.Q[-1, -1] *= 0.01
self.kf.Q[4:, 4:] *= 0.01
# keep all observations
self.history_obs = []
self.attr_saved = None
self.observed = False
def initialize(self, observation):
"""
Args:
observation: x-y-s-a
"""
self.kf.x = self.kf.x.flatten()
self.kf.x[:4] = observation
def predict(self, ):
""" predict step
"""
# s + vs
if (self.kf.x[6] + self.kf.x[2] <= 0):
self.kf.x[6] *= 0.0
self.kf.predict()
def update(self, z):
""" update step
For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman
Args:
z: observation x-y-s-a format
"""
self.kf.update(z)

View File

@@ -0,0 +1,101 @@
from .base_kalman import BaseKalman
import numpy as np
class NSAKalman(BaseKalman):
def __init__(self, ):
state_dim = 8 # [x, y, a, h, vx, vy, va, vh]
observation_dim = 4
F = np.eye(state_dim, state_dim)
'''
[1, 0, 0, 0, 1, 0, 0]
[0, 1, 0, 0, 0, 1, 0]
...
'''
for i in range(state_dim // 2):
F[i, i + state_dim // 2] = 1
H = np.eye(state_dim // 2, state_dim)
super().__init__(state_dim=state_dim,
observation_dim=observation_dim,
F=F,
H=H)
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
def initialize(self, observation):
""" init x, P, Q, R
Args:
observation: x-y-a-h format
"""
# init x, P, Q, R
mean_pos = observation
mean_vel = np.zeros_like(observation)
self.kf.x = np.r_[mean_pos, mean_vel] # x_{0, 0}
std = [
2 * self._std_weight_position * observation[3], # related to h
2 * self._std_weight_position * observation[3],
1e-2,
2 * self._std_weight_position * observation[3],
10 * self._std_weight_velocity * observation[3],
10 * self._std_weight_velocity * observation[3],
1e-5,
10 * self._std_weight_velocity * observation[3],
]
self.kf.P = np.diag(np.square(std)) # P_{0, 0}
def predict(self, ):
""" predict step
x_{n + 1, n} = F * x_{n, n}
P_{n + 1, n} = F * P_{n, n} * F^T + Q
"""
std_pos = [
self._std_weight_position * self.kf.x[3],
self._std_weight_position * self.kf.x[3],
1e-2,
self._std_weight_position * self.kf.x[3]]
std_vel = [
self._std_weight_velocity * self.kf.x[3],
self._std_weight_velocity * self.kf.x[3],
1e-5,
self._std_weight_velocity * self.kf.x[3]]
Q = np.diag(np.square(np.r_[std_pos, std_vel]))
self.kf.predict(Q=Q)
def update(self, z, score):
""" update step
Args:
z: observation x-y-a-h format
score: the detection score/confidence required by NSA kalman
K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
"""
std = [
self._std_weight_position * self.kf.x[3],
self._std_weight_position * self.kf.x[3],
1e-1,
self._std_weight_position * self.kf.x[3]]
# NSA
std = [(1. - score) * x for x in std]
R = np.diag(np.square(std))
self.kf.update(z=z, R=R)

View File

@@ -0,0 +1,27 @@
from .base_kalman import BaseKalman
import numpy as np
class UCMCKalman(BaseKalman):
def __init__(self, ):
state_dim = 8
observation_dim = 4
F = np.eye(state_dim, state_dim)
'''
[1, 0, 0, 0, 1, 0, 0]
[0, 1, 0, 0, 0, 1, 0]
...
'''
for i in range(state_dim // 2):
F[i, i + state_dim // 2] = 1
H = np.eye(state_dim // 2, state_dim)
super().__init__(state_dim=state_dim,
observation_dim=observation_dim,
F=F,
H=H)
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160

View File

@@ -0,0 +1,388 @@
import cv2
import numpy as np
import scipy
import lap
from scipy.spatial.distance import cdist
import math
from cython_bbox import bbox_overlaps as bbox_ious
import time
chi2inv95 = {
1: 3.8415,
2: 5.9915,
3: 7.8147,
4: 9.4877,
5: 11.070,
6: 12.592,
7: 14.067,
8: 15.507,
9: 16.919}
def merge_matches(m1, m2, shape):
O,P,Q = shape
m1 = np.asarray(m1)
m2 = np.asarray(m2)
M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
mask = M1*M2
match = mask.nonzero()
match = list(zip(match[0], match[1]))
unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
return match, unmatched_O, unmatched_Q
def _indices_to_matches(cost_matrix, indices, thresh):
matched_cost = cost_matrix[tuple(zip(*indices))]
matched_mask = (matched_cost <= thresh)
matches = indices[matched_mask]
unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
return matches, unmatched_a, unmatched_b
def linear_assignment(cost_matrix, thresh):
if cost_matrix.size == 0:
return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
matches, unmatched_a, unmatched_b = [], [], []
cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
for ix, mx in enumerate(x):
if mx >= 0:
matches.append([ix, mx])
unmatched_a = np.where(x < 0)[0]
unmatched_b = np.where(y < 0)[0]
matches = np.asarray(matches)
return matches, unmatched_a, unmatched_b
def ious(atlbrs, btlbrs):
"""
Compute cost based on IoU
:type atlbrs: list[tlbr] | np.ndarray
:type atlbrs: list[tlbr] | np.ndarray
:rtype ious np.ndarray
"""
ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
if ious.size == 0:
return ious
ious = bbox_ious(
np.ascontiguousarray(atlbrs, dtype=np.float),
np.ascontiguousarray(btlbrs, dtype=np.float)
)
return ious
def iou_distance(atracks, btracks):
"""
Compute cost based on IoU
:type atracks: list[STrack]
:type btracks: list[STrack]
:rtype cost_matrix np.ndarray
"""
if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
atlbrs = atracks
btlbrs = btracks
else:
atlbrs = [track.tlbr for track in atracks]
btlbrs = [track.tlbr for track in btracks]
_ious = ious(atlbrs, btlbrs)
cost_matrix = 1 - _ious
return cost_matrix
def v_iou_distance(atracks, btracks):
"""
Compute cost based on IoU
:type atracks: list[STrack]
:type btracks: list[STrack]
:rtype cost_matrix np.ndarray
"""
if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
atlbrs = atracks
btlbrs = btracks
else:
atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
_ious = ious(atlbrs, btlbrs)
cost_matrix = 1 - _ious
return cost_matrix
def embedding_distance(tracks, detections, metric='cosine'):
"""
:param tracks: list[STrack]
:param detections: list[BaseTrack]
:param metric:
:return: cost_matrix np.ndarray
"""
cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
if cost_matrix.size == 0:
return cost_matrix
det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
#for i, track in enumerate(tracks):
#cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features
return cost_matrix
def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
if cost_matrix.size == 0:
return cost_matrix
gating_dim = 2 if only_position else 4
gating_threshold = chi2inv95[gating_dim]
measurements = np.asarray([det.to_xyah() for det in detections])
for row, track in enumerate(tracks):
gating_distance = kf.gating_distance(
track.mean, track.covariance, measurements, only_position, metric='maha')
cost_matrix[row, gating_distance > gating_threshold] = np.inf
cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
return cost_matrix
def fuse_iou(cost_matrix, tracks, detections):
if cost_matrix.size == 0:
return cost_matrix
reid_sim = 1 - cost_matrix
iou_dist = iou_distance(tracks, detections)
iou_sim = 1 - iou_dist
fuse_sim = reid_sim * (1 + iou_sim) / 2
det_scores = np.array([det.score for det in detections])
det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
#fuse_sim = fuse_sim * (1 + det_scores) / 2
fuse_cost = 1 - fuse_sim
return fuse_cost
def fuse_score(cost_matrix, detections):
if cost_matrix.size == 0:
return cost_matrix
iou_sim = 1 - cost_matrix
det_scores = np.array([det.score for det in detections])
det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
fuse_sim = iou_sim * det_scores
fuse_cost = 1 - fuse_sim
return fuse_cost
def greedy_assignment_iou(dist, thresh):
matched_indices = []
if dist.shape[1] == 0:
return np.array(matched_indices, np.int32).reshape(-1, 2)
for i in range(dist.shape[0]):
j = dist[i].argmin()
if dist[i][j] < thresh:
dist[:, j] = 1.
matched_indices.append([j, i])
return np.array(matched_indices, np.int32).reshape(-1, 2)
def greedy_assignment(dists, threshs):
matches = greedy_assignment_iou(dists.T, threshs)
u_det = [d for d in range(dists.shape[1]) if not (d in matches[:, 1])]
u_track = [d for d in range(dists.shape[0]) if not (d in matches[:, 0])]
return matches, u_track, u_det
def fuse_score_matrix(cost_matrix, detections, tracks):
if cost_matrix.size == 0:
return cost_matrix
iou_sim = 1 - cost_matrix
det_scores = np.array([det.score for det in detections])
det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
trk_scores = np.array([trk.score for trk in tracks])
trk_scores = np.expand_dims(trk_scores, axis=1).repeat(cost_matrix.shape[1], axis=1)
mid_scores = (det_scores + trk_scores) / 2
fuse_sim = iou_sim * mid_scores
fuse_cost = 1 - fuse_sim
return fuse_cost
"""
calculate buffered IoU, used in C_BIoU_Tracker
"""
def buffered_iou_distance(atracks, btracks, level=1):
"""
atracks: list[C_BIoUSTrack], tracks
btracks: list[C_BIoUSTrack], detections
level: cascade level, 1 or 2
"""
assert level in [1, 2], 'level must be 1 or 2'
if level == 1: # use motion_state1(tracks) and buffer_bbox1(detections) to calculate
atlbrs = [track.tlwh_to_tlbr(track.motion_state1) for track in atracks]
btlbrs = [det.tlwh_to_tlbr(det.buffer_bbox1) for det in btracks]
else:
atlbrs = [track.tlwh_to_tlbr(track.motion_state2) for track in atracks]
btlbrs = [det.tlwh_to_tlbr(det.buffer_bbox2) for det in btracks]
_ious = ious(atlbrs, btlbrs)
cost_matrix = 1 - _ious
return cost_matrix
"""
observation centric association, with velocity, for OC Sort
"""
def observation_centric_association(tracklets, detections, iou_threshold, velocities, previous_obs, vdc_weight):
if(len(tracklets) == 0):
return np.empty((0, 2), dtype=int), tuple(range(len(tracklets))), tuple(range(len(detections)))
# get numpy format bboxes
trk_tlbrs = np.array([track.tlbr for track in tracklets])
det_tlbrs = np.array([det.tlbr for det in detections])
det_scores = np.array([det.score for det in detections])
iou_matrix = bbox_ious(trk_tlbrs, det_tlbrs)
Y, X = speed_direction_batch(det_tlbrs, previous_obs)
inertia_Y, inertia_X = velocities[:,0], velocities[:,1]
inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
diff_angle_cos = inertia_X * X + inertia_Y * Y
diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
diff_angle = np.arccos(diff_angle_cos)
diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
valid_mask = np.ones(previous_obs.shape[0])
valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
scores = np.repeat(det_scores[:, np.newaxis], trk_tlbrs.shape[0], axis=1)
valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
angle_diff_cost = angle_diff_cost * scores.T
matches, unmatched_a, unmatched_b = linear_assignment(- (iou_matrix + angle_diff_cost), thresh=0.9)
return matches, unmatched_a, unmatched_b
"""
helper func of observation_centric_association
"""
def speed_direction_batch(dets, tracks):
tracks = tracks[..., np.newaxis]
CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:,1] + dets[:,3]) / 2.0
CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0
dx = CX2 - CX1
dy = CY2 - CY1
norm = np.sqrt(dx**2 + dy**2) + 1e-6
dx = dx / norm
dy = dy / norm
return dy, dx # size: num_track x num_det
def matching_cascade(
distance_metric, matching_thresh, cascade_depth, tracks, detections,
track_indices=None, detection_indices=None):
"""
Run matching cascade in DeepSORT
distance_metirc: function that calculate the cost matrix
matching_thresh: float, Associations with cost larger than this value are disregarded.
cascade_path: int, equal to max_age of a tracklet
tracks: List[STrack], current tracks
detections: List[STrack], current detections
track_indices: List[int], tracks that will be calculated, Default None
detection_indices: List[int], detections that will be calculated, Default None
return:
matched pair, unmatched tracks, unmatced detections: List[int], List[int], List[int]
"""
if track_indices is None:
track_indices = list(range(len(tracks)))
if detection_indices is None:
detection_indices = list(range(len(detections)))
detections_to_match = detection_indices
matches = []
for level in range(cascade_depth):
"""
match new track with detection firstly
"""
if not len(detections_to_match): # No detections left
break
track_indices_l = [
k for k in track_indices
if tracks[k].time_since_update == 1 + level
] # filter tracks whose age is equal to level + 1 (The age of Newest track = 1)
if not len(track_indices_l): # Nothing to match at this level
continue
# tracks and detections which will be mathcted in current level
track_l = [tracks[idx] for idx in track_indices_l] # List[STrack]
det_l = [detections[idx] for idx in detections_to_match] # List[STrack]
# calculate the cost matrix
cost_matrix = distance_metric(track_l, det_l)
# solve the linear assignment problem
matched_row_col, umatched_row, umatched_col = \
linear_assignment(cost_matrix, matching_thresh)
for row, col in matched_row_col: # for those who matched
matches.append((track_indices_l[row], detections_to_match[col]))
umatched_detecion_l = [] # current detections not matched
for col in umatched_col: # for detections not matched
umatched_detecion_l.append(detections_to_match[col])
detections_to_match = umatched_detecion_l # update detections to match for next level
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
return matches, unmatched_tracks, detections_to_match
def nearest_embedding_distance(tracks, detections, metric='cosine'):
"""
different from embedding distance, this func calculate the
nearest distance among all track history features and detections
tracks: list[STrack]
detections: list[STrack]
metric: str, cosine or euclidean
TODO: support euclidean distance
return:
cost_matrix, np.ndarray, shape(len(tracks), len(detections))
"""
cost_matrix = np.zeros((len(tracks), len(detections)))
det_features = np.asarray([det.features[-1] for det in detections])
for row, track in enumerate(tracks):
track_history_features = np.asarray(track.features)
dist = 1. - cal_cosine_distance(track_history_features, det_features)
dist = dist.min(axis=0)
cost_matrix[row, :] = dist
return cost_matrix
def cal_cosine_distance(mat1, mat2):
"""
simple func to calculate cosine distance between 2 matrixs
:param mat1: np.ndarray, shape(M, dim)
:param mat2: np.ndarray, shape(N, dim)
:return: np.ndarray, shape(M, N)
"""
# result = mat1·mat2^T / |mat1|·|mat2|
# norm mat1 and mat2
mat1 = mat1 / np.linalg.norm(mat1, axis=1, keepdims=True)
mat2 = mat2 / np.linalg.norm(mat2, axis=1, keepdims=True)
return np.dot(mat1, mat2.T)

View File

@@ -0,0 +1,237 @@
"""
OC Sort
"""
import numpy as np
from collections import deque
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet, Tracklet_w_velocity
from .matching import *
from cython_bbox import bbox_overlaps as bbox_ious
class OCSortTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
self.delta_t = 3
@staticmethod
def k_previous_obs(observations, cur_age, k):
if len(observations) == 0:
return [-1, -1, -1, -1, -1]
for i in range(k):
dt = k - i
if cur_age - dt in observations:
return observations[cur_age-dt]
max_age = max(observations.keys())
return observations[max_age]
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlbr format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
inds_low = scores > 0.1
inds_high = scores < self.args.conf_thresh
inds_second = np.logical_and(inds_low, inds_high)
dets_second = bboxes[inds_second]
dets = bboxes[remain_inds]
cates = categories[remain_inds]
cates_second = categories[inds_second]
scores_keep = scores[remain_inds]
scores_second = scores[inds_second]
if len(dets) > 0:
'''Detections'''
detections = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, Observation Centric Momentum'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
velocities = np.array(
[trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in tracklet_pool])
# last observation, obervation-centric
# last_boxes = np.array([trk.last_observation for trk in tracklet_pool])
# historical observations
k_observations = np.array(
[self.k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in tracklet_pool])
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
# Observation centric cost matrix and assignment
matches, u_track, u_detection = observation_centric_association(
tracklets=tracklet_pool, detections=detections, iou_threshold=0.3,
velocities=velocities, previous_obs=k_observations, vdc_weight=0.2
)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
''' Step 3: Second association, with low score detection boxes'''
# association the untrack to the low score detections
if len(dets_second) > 0:
'''Detections'''
detections_second = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
else:
detections_second = []
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
# for unmatched tracks in the first round, use last obervation
r_tracked_tracklets_last_observ = [tracklet_pool[i].last_observation[:4] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
detections_second_bbox = [det.tlbr for det in detections_second]
dists = 1. - ious(r_tracked_tracklets_last_observ, detections_second_bbox)
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_tracklets[itracked]
det = detections_second[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
for it in u_track:
track = r_tracked_tracklets[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,98 @@
"""
AFLink code in StrongSORT(StrongSORT: Make DeepSORT Great Again(arxiv))
copied from origin repo
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import cv2
import logging
import torchvision.transforms as transforms
class TemporalBlock(nn.Module):
def __init__(self, cin, cout):
super(TemporalBlock, self).__init__()
self.conv = nn.Conv2d(cin, cout, (7, 1), bias=False)
self.relu = nn.ReLU(inplace=True)
self.bnf = nn.BatchNorm1d(cout)
self.bnx = nn.BatchNorm1d(cout)
self.bny = nn.BatchNorm1d(cout)
def bn(self, x):
x[:, :, :, 0] = self.bnf(x[:, :, :, 0])
x[:, :, :, 1] = self.bnx(x[:, :, :, 1])
x[:, :, :, 2] = self.bny(x[:, :, :, 2])
return x
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class FusionBlock(nn.Module):
def __init__(self, cin, cout):
super(FusionBlock, self).__init__()
self.conv = nn.Conv2d(cin, cout, (1, 3), bias=False)
self.bn = nn.BatchNorm2d(cout)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Classifier(nn.Module):
def __init__(self, cin):
super(Classifier, self).__init__()
self.fc1 = nn.Linear(cin*2, cin//2)
self.relu = nn.ReLU(inplace=True)
self.fc2 = nn.Linear(cin//2, 2)
def forward(self, x1, x2):
x = torch.cat((x1, x2), dim=1)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
class PostLinker(nn.Module):
def __init__(self):
super(PostLinker, self).__init__()
self.TemporalModule_1 = nn.Sequential(
TemporalBlock(1, 32),
TemporalBlock(32, 64),
TemporalBlock(64, 128),
TemporalBlock(128, 256)
)
self.TemporalModule_2 = nn.Sequential(
TemporalBlock(1, 32),
TemporalBlock(32, 64),
TemporalBlock(64, 128),
TemporalBlock(128, 256)
)
self.FusionBlock_1 = FusionBlock(256, 256)
self.FusionBlock_2 = FusionBlock(256, 256)
self.pooling = nn.AdaptiveAvgPool2d((1, 1))
self.classifier = Classifier(256)
def forward(self, x1, x2):
x1 = x1[:, :, :, :3]
x2 = x2[:, :, :, :3]
x1 = self.TemporalModule_1(x1) # [B,1,30,3] -> [B,256,6,3]
x2 = self.TemporalModule_2(x2)
x1 = self.FusionBlock_1(x1)
x2 = self.FusionBlock_2(x2)
x1 = self.pooling(x1).squeeze(-1).squeeze(-1)
x2 = self.pooling(x2).squeeze(-1).squeeze(-1)
y = self.classifier(x1, x2)
if not self.training:
y = torch.softmax(y, dim=1)
return y

View File

@@ -0,0 +1,598 @@
from __future__ import division, absolute_import
import warnings
import torch
from torch import nn
from torch.nn import functional as F
__all__ = [
'osnet_x1_0', 'osnet_x0_75', 'osnet_x0_5', 'osnet_x0_25', 'osnet_ibn_x1_0'
]
pretrained_urls = {
'osnet_x1_0':
'https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY',
'osnet_x0_75':
'https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq',
'osnet_x0_5':
'https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i',
'osnet_x0_25':
'https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs',
'osnet_ibn_x1_0':
'https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l'
}
##########
# Basic layers
##########
class ConvLayer(nn.Module):
"""Convolution layer (conv + bn + relu)."""
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
groups=1,
IN=False
):
super(ConvLayer, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=padding,
bias=False,
groups=groups
)
if IN:
self.bn = nn.InstanceNorm2d(out_channels, affine=True)
else:
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Conv1x1(nn.Module):
"""1x1 convolution + bn + relu."""
def __init__(self, in_channels, out_channels, stride=1, groups=1):
super(Conv1x1, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
1,
stride=stride,
padding=0,
bias=False,
groups=groups
)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Conv1x1Linear(nn.Module):
"""1x1 convolution + bn (w/o non-linearity)."""
def __init__(self, in_channels, out_channels, stride=1):
super(Conv1x1Linear, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, 1, stride=stride, padding=0, bias=False
)
self.bn = nn.BatchNorm2d(out_channels)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return x
class Conv3x3(nn.Module):
"""3x3 convolution + bn + relu."""
def __init__(self, in_channels, out_channels, stride=1, groups=1):
super(Conv3x3, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
3,
stride=stride,
padding=1,
bias=False,
groups=groups
)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class LightConv3x3(nn.Module):
"""Lightweight 3x3 convolution.
1x1 (linear) + dw 3x3 (nonlinear).
"""
def __init__(self, in_channels, out_channels):
super(LightConv3x3, self).__init__()
self.conv1 = nn.Conv2d(
in_channels, out_channels, 1, stride=1, padding=0, bias=False
)
self.conv2 = nn.Conv2d(
out_channels,
out_channels,
3,
stride=1,
padding=1,
bias=False,
groups=out_channels
)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.bn(x)
x = self.relu(x)
return x
##########
# Building blocks for omni-scale feature learning
##########
class ChannelGate(nn.Module):
"""A mini-network that generates channel-wise gates conditioned on input tensor."""
def __init__(
self,
in_channels,
num_gates=None,
return_gates=False,
gate_activation='sigmoid',
reduction=16,
layer_norm=False
):
super(ChannelGate, self).__init__()
if num_gates is None:
num_gates = in_channels
self.return_gates = return_gates
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.fc1 = nn.Conv2d(
in_channels,
in_channels // reduction,
kernel_size=1,
bias=True,
padding=0
)
self.norm1 = None
if layer_norm:
self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
self.relu = nn.ReLU(inplace=True)
self.fc2 = nn.Conv2d(
in_channels // reduction,
num_gates,
kernel_size=1,
bias=True,
padding=0
)
if gate_activation == 'sigmoid':
self.gate_activation = nn.Sigmoid()
elif gate_activation == 'relu':
self.gate_activation = nn.ReLU(inplace=True)
elif gate_activation == 'linear':
self.gate_activation = None
else:
raise RuntimeError(
"Unknown gate activation: {}".format(gate_activation)
)
def forward(self, x):
input = x
x = self.global_avgpool(x)
x = self.fc1(x)
if self.norm1 is not None:
x = self.norm1(x)
x = self.relu(x)
x = self.fc2(x)
if self.gate_activation is not None:
x = self.gate_activation(x)
if self.return_gates:
return x
return input * x
class OSBlock(nn.Module):
"""Omni-scale feature learning block."""
def __init__(
self,
in_channels,
out_channels,
IN=False,
bottleneck_reduction=4,
**kwargs
):
super(OSBlock, self).__init__()
mid_channels = out_channels // bottleneck_reduction
self.conv1 = Conv1x1(in_channels, mid_channels)
self.conv2a = LightConv3x3(mid_channels, mid_channels)
self.conv2b = nn.Sequential(
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
)
self.conv2c = nn.Sequential(
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
)
self.conv2d = nn.Sequential(
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
)
self.gate = ChannelGate(mid_channels)
self.conv3 = Conv1x1Linear(mid_channels, out_channels)
self.downsample = None
if in_channels != out_channels:
self.downsample = Conv1x1Linear(in_channels, out_channels)
self.IN = None
if IN:
self.IN = nn.InstanceNorm2d(out_channels, affine=True)
def forward(self, x):
identity = x
x1 = self.conv1(x)
x2a = self.conv2a(x1)
x2b = self.conv2b(x1)
x2c = self.conv2c(x1)
x2d = self.conv2d(x1)
x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
x3 = self.conv3(x2)
if self.downsample is not None:
identity = self.downsample(identity)
out = x3 + identity
if self.IN is not None:
out = self.IN(out)
return F.relu(out)
##########
# Network architecture
##########
class OSNet(nn.Module):
"""Omni-Scale Network.
Reference:
- Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.
- Zhou et al. Learning Generalisable Omni-Scale Representations
for Person Re-Identification. TPAMI, 2021.
"""
def __init__(
self,
num_classes,
blocks,
layers,
channels,
feature_dim=512,
loss='softmax',
IN=False,
**kwargs
):
super(OSNet, self).__init__()
num_blocks = len(blocks)
assert num_blocks == len(layers)
assert num_blocks == len(channels) - 1
self.loss = loss
self.feature_dim = feature_dim
# convolutional backbone
self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
self.conv2 = self._make_layer(
blocks[0],
layers[0],
channels[0],
channels[1],
reduce_spatial_size=True,
IN=IN
)
self.conv3 = self._make_layer(
blocks[1],
layers[1],
channels[1],
channels[2],
reduce_spatial_size=True
)
self.conv4 = self._make_layer(
blocks[2],
layers[2],
channels[2],
channels[3],
reduce_spatial_size=False
)
self.conv5 = Conv1x1(channels[3], channels[3])
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
# fully connected layer
self.fc = self._construct_fc_layer(
self.feature_dim, channels[3], dropout_p=None
)
# identity classification layer
self.classifier = nn.Linear(self.feature_dim, num_classes)
self._init_params()
def _make_layer(
self,
block,
layer,
in_channels,
out_channels,
reduce_spatial_size,
IN=False
):
layers = []
layers.append(block(in_channels, out_channels, IN=IN))
for i in range(1, layer):
layers.append(block(out_channels, out_channels, IN=IN))
if reduce_spatial_size:
layers.append(
nn.Sequential(
Conv1x1(out_channels, out_channels),
nn.AvgPool2d(2, stride=2)
)
)
return nn.Sequential(*layers)
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
if fc_dims is None or fc_dims < 0:
self.feature_dim = input_dim
return None
if isinstance(fc_dims, int):
fc_dims = [fc_dims]
layers = []
for dim in fc_dims:
layers.append(nn.Linear(input_dim, dim))
layers.append(nn.BatchNorm1d(dim))
layers.append(nn.ReLU(inplace=True))
if dropout_p is not None:
layers.append(nn.Dropout(p=dropout_p))
input_dim = dim
self.feature_dim = fc_dims[-1]
return nn.Sequential(*layers)
def _init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu'
)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def featuremaps(self, x):
x = self.conv1(x)
x = self.maxpool(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
return x
def forward(self, x, return_featuremaps=False):
x = self.featuremaps(x)
if return_featuremaps:
return x
v = self.global_avgpool(x)
v = v.view(v.size(0), -1)
if self.fc is not None:
v = self.fc(v)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
def init_pretrained_weights(model, key=''):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
import os
import errno
import gdown
from collections import OrderedDict
def _get_torch_home():
ENV_TORCH_HOME = 'TORCH_HOME'
ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
DEFAULT_CACHE_DIR = '~/.cache'
torch_home = os.path.expanduser(
os.getenv(
ENV_TORCH_HOME,
os.path.join(
os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
)
)
)
return torch_home
torch_home = _get_torch_home()
model_dir = os.path.join(torch_home, 'checkpoints')
try:
os.makedirs(model_dir)
except OSError as e:
if e.errno == errno.EEXIST:
# Directory already exists, ignore.
pass
else:
# Unexpected OSError, re-raise.
raise
filename = key + '_imagenet.pth'
cached_file = os.path.join(model_dir, filename)
if not os.path.exists(cached_file):
gdown.download(pretrained_urls[key], cached_file, quiet=False)
state_dict = torch.load(cached_file)
model_dict = model.state_dict()
new_state_dict = OrderedDict()
matched_layers, discarded_layers = [], []
for k, v in state_dict.items():
if k.startswith('module.'):
k = k[7:] # discard module.
if k in model_dict and model_dict[k].size() == v.size():
new_state_dict[k] = v
matched_layers.append(k)
else:
discarded_layers.append(k)
model_dict.update(new_state_dict)
model.load_state_dict(model_dict)
if len(matched_layers) == 0:
warnings.warn(
'The pretrained weights from "{}" cannot be loaded, '
'please check the key names manually '
'(** ignored and continue **)'.format(cached_file)
)
else:
print(
'Successfully loaded imagenet pretrained weights from "{}"'.
format(cached_file)
)
if len(discarded_layers) > 0:
print(
'** The following layers are discarded '
'due to unmatched keys or layer size: {}'.
format(discarded_layers)
)
##########
# Instantiation
##########
def osnet_x1_0(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
# standard size (width x1.0)
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[64, 256, 384, 512],
loss=loss,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_x1_0')
return model
def osnet_x0_75(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
# medium size (width x0.75)
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[48, 192, 288, 384],
loss=loss,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_x0_75')
return model
def osnet_x0_5(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
# tiny size (width x0.5)
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[32, 128, 192, 256],
loss=loss,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_x0_5')
return model
def osnet_x0_25(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
# very tiny size (width x0.25)
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[16, 64, 96, 128],
loss=loss,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_x0_25')
return model
def osnet_ibn_x1_0(
num_classes=1000, pretrained=True, loss='softmax', **kwargs
):
# standard size (width x1.0) + IBN layer
# Ref: Pan et al. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. ECCV, 2018.
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[64, 256, 384, 512],
loss=loss,
IN=True,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_ibn_x1_0')
return model

View File

@@ -0,0 +1,3 @@
"""
file for reid_models folder
"""

View File

@@ -0,0 +1,157 @@
"""
file for DeepSORT Re-ID model
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import cv2
import logging
import torchvision.transforms as transforms
class BasicBlock(nn.Module):
def __init__(self, c_in, c_out, is_downsample=False):
super(BasicBlock, self).__init__()
self.is_downsample = is_downsample
if is_downsample:
self.conv1 = nn.Conv2d(
c_in, c_out, 3, stride=2, padding=1, bias=False)
else:
self.conv1 = nn.Conv2d(
c_in, c_out, 3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(c_out)
self.relu = nn.ReLU(True)
self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(c_out)
if is_downsample:
self.downsample = nn.Sequential(
nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
nn.BatchNorm2d(c_out)
)
elif c_in != c_out:
self.downsample = nn.Sequential(
nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
nn.BatchNorm2d(c_out)
)
self.is_downsample = True
def forward(self, x):
y = self.conv1(x)
y = self.bn1(y)
y = self.relu(y)
y = self.conv2(y)
y = self.bn2(y)
if self.is_downsample:
x = self.downsample(x)
return F.relu(x.add(y), True)
def make_layers(c_in, c_out, repeat_times, is_downsample=False):
blocks = []
for i in range(repeat_times):
if i == 0:
blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ]
else:
blocks += [BasicBlock(c_out, c_out), ]
return nn.Sequential(*blocks)
class Net(nn.Module):
def __init__(self, num_classes=751, reid=False):
super(Net, self).__init__()
# 3 128 64
self.conv = nn.Sequential(
nn.Conv2d(3, 64, 3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
# nn.Conv2d(32,32,3,stride=1,padding=1),
# nn.BatchNorm2d(32),
# nn.ReLU(inplace=True),
nn.MaxPool2d(3, 2, padding=1),
)
# 32 64 32
self.layer1 = make_layers(64, 64, 2, False)
# 32 64 32
self.layer2 = make_layers(64, 128, 2, True)
# 64 32 16
self.layer3 = make_layers(128, 256, 2, True)
# 128 16 8
self.layer4 = make_layers(256, 512, 2, True)
# 256 8 4
self.avgpool = nn.AvgPool2d((8, 4), 1)
# 256 1 1
self.reid = reid
self.classifier = nn.Sequential(
nn.Linear(512, 256),
nn.BatchNorm1d(256),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(256, num_classes),
)
def forward(self, x):
x = self.conv(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
# B x 128
if self.reid:
x = x.div(x.norm(p=2, dim=1, keepdim=True))
return x
# classifier
x = self.classifier(x)
return x
class Extractor(object):
def __init__(self, model_path, use_cuda=True):
self.net = Net(reid=True)
self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
state_dict = torch.load(model_path, map_location=torch.device(self.device))[
'net_dict']
self.net.load_state_dict(state_dict)
logger = logging.getLogger("root.tracker")
logger.info("Loading weights from {}... Done!".format(model_path))
self.net.to(self.device)
self.size = (64, 128)
self.norm = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
def _preprocess(self, im_crops):
"""
TODO:
1. to float with scale from 0 to 1
2. resize to (64, 128) as Market1501 dataset did
3. concatenate to a numpy array
3. to torch Tensor
4. normalize
"""
def _resize(im, size):
try:
return cv2.resize(im.astype(np.float32)/255., size)
except:
print('Error: size in bbox exists zero, ', im.shape)
exit(0)
im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(
0) for im in im_crops], dim=0).float()
return im_batch
def __call__(self, im_crops):
if isinstance(im_crops, list):
im_batch = self._preprocess(im_crops)
else:
im_batch = im_crops
with torch.no_grad():
im_batch = im_batch.to(self.device)
features = self.net(im_batch)
return features

View File

@@ -0,0 +1,273 @@
"""
load checkpoint file
copied from https://github.com/mikel-brostrom/Yolov5_StrongSORT_OSNet
"""
from __future__ import division, print_function, absolute_import
import pickle
import shutil
import os.path as osp
import warnings
from functools import partial
from collections import OrderedDict
import torch
import torch.nn as nn
__all__ = [
'save_checkpoint', 'load_checkpoint', 'resume_from_checkpoint',
'open_all_layers', 'open_specified_layers', 'count_num_param',
'load_pretrained_weights'
]
def load_checkpoint(fpath):
r"""Loads checkpoint.
``UnicodeDecodeError`` can be well handled, which means
python2-saved files can be read from python3.
Args:
fpath (str): path to checkpoint.
Returns:
dict
Examples::
>>> from torchreid.utils import load_checkpoint
>>> fpath = 'log/my_model/model.pth.tar-10'
>>> checkpoint = load_checkpoint(fpath)
"""
if fpath is None:
raise ValueError('File path is None')
fpath = osp.abspath(osp.expanduser(fpath))
if not osp.exists(fpath):
raise FileNotFoundError('File is not found at "{}"'.format(fpath))
map_location = None if torch.cuda.is_available() else 'cpu'
try:
checkpoint = torch.load(fpath, map_location=map_location)
except UnicodeDecodeError:
pickle.load = partial(pickle.load, encoding="latin1")
pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1")
checkpoint = torch.load(
fpath, pickle_module=pickle, map_location=map_location
)
except Exception:
print('Unable to load checkpoint from "{}"'.format(fpath))
raise
return checkpoint
def resume_from_checkpoint(fpath, model, optimizer=None, scheduler=None):
r"""Resumes training from a checkpoint.
This will load (1) model weights and (2) ``state_dict``
of optimizer if ``optimizer`` is not None.
Args:
fpath (str): path to checkpoint.
model (nn.Module): model.
optimizer (Optimizer, optional): an Optimizer.
scheduler (LRScheduler, optional): an LRScheduler.
Returns:
int: start_epoch.
Examples::
>>> from torchreid.utils import resume_from_checkpoint
>>> fpath = 'log/my_model/model.pth.tar-10'
>>> start_epoch = resume_from_checkpoint(
>>> fpath, model, optimizer, scheduler
>>> )
"""
print('Loading checkpoint from "{}"'.format(fpath))
checkpoint = load_checkpoint(fpath)
model.load_state_dict(checkpoint['state_dict'])
print('Loaded model weights')
if optimizer is not None and 'optimizer' in checkpoint.keys():
optimizer.load_state_dict(checkpoint['optimizer'])
print('Loaded optimizer')
if scheduler is not None and 'scheduler' in checkpoint.keys():
scheduler.load_state_dict(checkpoint['scheduler'])
print('Loaded scheduler')
start_epoch = checkpoint['epoch']
print('Last epoch = {}'.format(start_epoch))
if 'rank1' in checkpoint.keys():
print('Last rank1 = {:.1%}'.format(checkpoint['rank1']))
return start_epoch
def adjust_learning_rate(
optimizer,
base_lr,
epoch,
stepsize=20,
gamma=0.1,
linear_decay=False,
final_lr=0,
max_epoch=100
):
r"""Adjusts learning rate.
Deprecated.
"""
if linear_decay:
# linearly decay learning rate from base_lr to final_lr
frac_done = epoch / max_epoch
lr = frac_done*final_lr + (1.-frac_done) * base_lr
else:
# decay learning rate by gamma for every stepsize
lr = base_lr * (gamma**(epoch // stepsize))
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def set_bn_to_eval(m):
r"""Sets BatchNorm layers to eval mode."""
# 1. no update for running mean and var
# 2. scale and shift parameters are still trainable
classname = m.__class__.__name__
if classname.find('BatchNorm') != -1:
m.eval()
def open_all_layers(model):
r"""Opens all layers in model for training.
Examples::
>>> from torchreid.utils import open_all_layers
>>> open_all_layers(model)
"""
model.train()
for p in model.parameters():
p.requires_grad = True
def open_specified_layers(model, open_layers):
r"""Opens specified layers in model for training while keeping
other layers frozen.
Args:
model (nn.Module): neural net model.
open_layers (str or list): layers open for training.
Examples::
>>> from torchreid.utils import open_specified_layers
>>> # Only model.classifier will be updated.
>>> open_layers = 'classifier'
>>> open_specified_layers(model, open_layers)
>>> # Only model.fc and model.classifier will be updated.
>>> open_layers = ['fc', 'classifier']
>>> open_specified_layers(model, open_layers)
"""
if isinstance(model, nn.DataParallel):
model = model.module
if isinstance(open_layers, str):
open_layers = [open_layers]
for layer in open_layers:
assert hasattr(
model, layer
), '"{}" is not an attribute of the model, please provide the correct name'.format(
layer
)
for name, module in model.named_children():
if name in open_layers:
module.train()
for p in module.parameters():
p.requires_grad = True
else:
module.eval()
for p in module.parameters():
p.requires_grad = False
def count_num_param(model):
r"""Counts number of parameters in a model while ignoring ``self.classifier``.
Args:
model (nn.Module): network model.
Examples::
>>> from torchreid.utils import count_num_param
>>> model_size = count_num_param(model)
.. warning::
This method is deprecated in favor of
``torchreid.utils.compute_model_complexity``.
"""
warnings.warn(
'This method is deprecated and will be removed in the future.'
)
num_param = sum(p.numel() for p in model.parameters())
if isinstance(model, nn.DataParallel):
model = model.module
if hasattr(model,
'classifier') and isinstance(model.classifier, nn.Module):
# we ignore the classifier because it is unused at test time
num_param -= sum(p.numel() for p in model.classifier.parameters())
return num_param
def load_pretrained_weights(model, weight_path):
r"""Loads pretrianed weights to model.
Features::
- Incompatible layers (unmatched in name or size) will be ignored.
- Can automatically deal with keys containing "module.".
Args:
model (nn.Module): network model.
weight_path (str): path to pretrained weights.
Examples::
>>> from torchreid.utils import load_pretrained_weights
>>> weight_path = 'log/my_model/model-best.pth.tar'
>>> load_pretrained_weights(model, weight_path)
"""
checkpoint = load_checkpoint(weight_path)
if 'state_dict' in checkpoint:
state_dict = checkpoint['state_dict']
else:
state_dict = checkpoint
model_dict = model.state_dict()
new_state_dict = OrderedDict()
matched_layers, discarded_layers = [], []
for k, v in state_dict.items():
if k.startswith('module.'):
k = k[7:] # discard module.
if k in model_dict and model_dict[k].size() == v.size():
new_state_dict[k] = v
matched_layers.append(k)
else:
discarded_layers.append(k)
model_dict.update(new_state_dict)
model.load_state_dict(model_dict)
if len(matched_layers) == 0:
warnings.warn(
'The pretrained weights "{}" cannot be loaded, '
'please check the key names manually '
'(** ignored and continue **)'.format(weight_path)
)
else:
print(
'Successfully loaded pretrained weights from "{}"'.
format(weight_path)
)
if len(discarded_layers) > 0:
print(
'** The following layers are discarded '
'due to unmatched keys or layer size: {}'.
format(discarded_layers)
)

View File

@@ -0,0 +1,169 @@
"""
Sort
"""
import numpy as np
from collections import deque
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet
from .matching import *
class SortTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlbr format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
dets = bboxes[remain_inds]
cates = categories[remain_inds]
scores_keep = scores[remain_inds]
if len(dets) > 0:
'''Detections'''
detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with high score detection boxes'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
dists = iou_distance(tracklet_pool, detections)
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 3: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 4: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,338 @@
"""
Bot sort
"""
import numpy as np
import torch
from torchvision.ops import nms
import cv2
import torchvision.transforms as T
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet, Tracklet_w_depth
from .matching import *
from .reid_models.OSNet import *
from .reid_models.load_model_tools import load_pretrained_weights
from .reid_models.deepsort_reid import Extractor
from .camera_motion_compensation import GMC
REID_MODEL_DICT = {
'osnet_x1_0': osnet_x1_0,
'osnet_x0_75': osnet_x0_75,
'osnet_x0_5': osnet_x0_5,
'osnet_x0_25': osnet_x0_25,
'deepsort': Extractor
}
def load_reid_model(reid_model, reid_model_path):
if 'osnet' in reid_model:
func = REID_MODEL_DICT[reid_model]
model = func(num_classes=1, pretrained=False, )
load_pretrained_weights(model, reid_model_path)
model.cuda().eval()
elif 'deepsort' in reid_model:
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
else:
raise NotImplementedError
return model
class SparseTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
# camera motion compensation module
self.gmc = GMC(method='orb', downscale=2, verbose=None)
def get_deep_range(self, obj, step):
col = []
for t in obj:
lend = (t.deep_vec)[2]
col.append(lend)
max_len, mix_len = max(col), min(col)
if max_len != mix_len:
deep_range =np.arange(mix_len, max_len, (max_len - mix_len + 1) / step)
if deep_range[-1] < max_len:
deep_range = np.concatenate([deep_range, np.array([max_len],)])
deep_range[0] = np.floor(deep_range[0])
deep_range[-1] = np.ceil(deep_range[-1])
else:
deep_range = [mix_len,]
mask = self.get_sub_mask(deep_range, col)
return mask
def get_sub_mask(self, deep_range, col):
mix_len=deep_range[0]
max_len=deep_range[-1]
if max_len == mix_len:
lc = mix_len
mask = []
for d in deep_range:
if d > deep_range[0] and d < deep_range[-1]:
mask.append((col >= lc) & (col < d))
lc = d
elif d == deep_range[-1]:
mask.append((col >= lc) & (col <= d))
lc = d
else:
lc = d
continue
return mask
# core function
def DCM(self, detections, tracks, activated_tracklets, refind_tracklets, levels, thresh, is_fuse):
if len(detections) > 0:
det_mask = self.get_deep_range(detections, levels)
else:
det_mask = []
if len(tracks)!=0:
track_mask = self.get_deep_range(tracks, levels)
else:
track_mask = []
u_detection, u_tracks, res_det, res_track = [], [], [], []
if len(track_mask) != 0:
if len(track_mask) < len(det_mask):
for i in range(len(det_mask) - len(track_mask)):
idx = np.argwhere(det_mask[len(track_mask) + i] == True)
for idd in idx:
res_det.append(detections[idd[0]])
elif len(track_mask) > len(det_mask):
for i in range(len(track_mask) - len(det_mask)):
idx = np.argwhere(track_mask[len(det_mask) + i] == True)
for idd in idx:
res_track.append(tracks[idd[0]])
for dm, tm in zip(det_mask, track_mask):
det_idx = np.argwhere(dm == True)
trk_idx = np.argwhere(tm == True)
# search det
det_ = []
for idd in det_idx:
det_.append(detections[idd[0]])
det_ = det_ + u_detection
# search trk
track_ = []
for idt in trk_idx:
track_.append(tracks[idt[0]])
# update trk
track_ = track_ + u_tracks
dists = iou_distance(track_, det_)
matches, u_track_, u_det_ = linear_assignment(dists, thresh)
for itracked, idet in matches:
track = track_[itracked]
det = det_[idet]
if track.state == TrackState.Tracked:
track.update(det_[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
u_tracks = [track_[t] for t in u_track_]
u_detection = [det_[t] for t in u_det_]
u_tracks = u_tracks + res_track
u_detection = u_detection + res_det
else:
u_detection = detections
return activated_tracklets, refind_tracklets, u_tracks, u_detection
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlwh format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
inds_low = scores > 0.1
inds_high = scores < self.args.conf_thresh
inds_second = np.logical_and(inds_low, inds_high)
dets_second = bboxes[inds_second]
dets = bboxes[remain_inds]
cates = categories[remain_inds]
cates_second = categories[inds_second]
scores_keep = scores[remain_inds]
scores_second = scores[inds_second]
if len(dets) > 0:
detections = [Tracklet_w_depth(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
else:
detections = []
''' Step 1: Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with high score detection boxes, depth cascade mathcing'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
# Camera motion compensation
warp = self.gmc.apply(ori_img, dets)
self.gmc.multi_gmc(tracklet_pool, warp)
self.gmc.multi_gmc(unconfirmed, warp)
# depth cascade matching
activated_tracklets, refind_tracklets, u_track, u_detection_high = self.DCM(
detections,
tracklet_pool,
activated_tracklets,
refind_tracklets,
levels=3,
thresh=0.75,
is_fuse=True)
''' Step 3: Second association, with low score detection boxes, depth cascade mathcing'''
if len(dets_second) > 0:
'''Detections'''
detections_second = [Tracklet_w_depth(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
else:
detections_second = []
r_tracked_tracklets = [t for t in u_track if t.state == TrackState.Tracked]
activated_tracklets, refind_tracklets, u_track, u_detection_sec = self.DCM(
detections_second,
r_tracked_tracklets,
activated_tracklets,
refind_tracklets,
levels=3,
thresh=0.3,
is_fuse=False)
for track in u_track:
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = u_detection_high
dists = iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,327 @@
"""
Deep Sort
"""
import numpy as np
import torch
from torchvision.ops import nms
import cv2
import torchvision.transforms as T
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet, Tracklet_w_reid
from .matching import *
from .reid_models.OSNet import *
from .reid_models.load_model_tools import load_pretrained_weights
from .reid_models.deepsort_reid import Extractor
REID_MODEL_DICT = {
'osnet_x1_0': osnet_x1_0,
'osnet_x0_75': osnet_x0_75,
'osnet_x0_5': osnet_x0_5,
'osnet_x0_25': osnet_x0_25,
'deepsort': Extractor
}
def load_reid_model(reid_model, reid_model_path):
if 'osnet' in reid_model:
func = REID_MODEL_DICT[reid_model]
model = func(num_classes=1, pretrained=False, )
load_pretrained_weights(model, reid_model_path)
model.cuda().eval()
elif 'deepsort' in reid_model:
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
else:
raise NotImplementedError
return model
class StrongSortTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
self.with_reid = not args.discard_reid
self.reid_model, self.crop_transforms = None, None
if self.with_reid:
self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
self.crop_transforms = T.Compose([
# T.ToPILImage(),
# T.Resize(size=(256, 128)),
T.ToTensor(), # (c, 128, 256)
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128)
self.lambda_ = 0.98 # the coef of cost mix in eq. 10 in paper
def reid_preprocess(self, obj_bbox):
"""
preprocess cropped object bboxes
obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
return:
torch.Tensor of shape (c, 128, 256)
"""
obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size) # shape: (h, w, c)
return self.crop_transforms(obj_bbox)
def get_feature(self, tlwhs, ori_img):
"""
get apperance feature of an object
tlwhs: shape (num_of_objects, 4)
ori_img: original image, np.ndarray, shape(H, W, C)
"""
obj_bbox = []
for tlwh in tlwhs:
tlwh = list(map(int, tlwh))
# limit to the legal range
tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0)
tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
obj_bbox.append(tlbr_tensor)
if not obj_bbox:
return np.array([])
obj_bbox = torch.stack(obj_bbox, dim=0)
obj_bbox = obj_bbox.cuda()
features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim)
return features.cpu().detach().numpy()
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlbr format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
dets = bboxes[remain_inds]
cates = categories[remain_inds]
scores_keep = scores[remain_inds]
features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
if len(dets) > 0:
'''Detections'''
detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
(tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with appearance'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
# vallina matching
cost_matrix = self.gated_metric(tracklet_pool, detections)
matches, u_track, u_detection = linear_assignment(cost_matrix, thresh=0.9)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
'''Step 3: Second association, with iou'''
tracklet_for_iou = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
detection_for_iou = [detections[i] for i in u_detection]
dists = iou_distance(tracklet_for_iou, detection_for_iou)
matches, u_track, u_detection = linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = tracklet_for_iou[itracked]
det = detection_for_iou[idet]
if track.state == TrackState.Tracked:
track.update(detection_for_iou[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
for it in u_track:
track = tracklet_for_iou[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detection_for_iou[i] for i in u_detection]
dists = iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def gated_metric(self, tracks, dets):
"""
get cost matrix, firstly calculate apperence cost, then filter by Kalman state.
tracks: List[STrack]
dets: List[STrack]
"""
apperance_dist = embedding_distance(tracks=tracks, detections=dets, metric='cosine')
cost_matrix = self.gate_cost_matrix(apperance_dist, tracks, dets, )
return cost_matrix
def gate_cost_matrix(self, cost_matrix, tracks, dets, max_apperance_thresh=0.15, gated_cost=1e5, only_position=False):
"""
gate cost matrix by calculating the Kalman state distance and constrainted by
0.95 confidence interval of x2 distribution
cost_matrix: np.ndarray, shape (len(tracks), len(dets))
tracks: List[STrack]
dets: List[STrack]
gated_cost: a very largt const to infeasible associations
only_position: use [xc, yc, a, h] as state vector or only use [xc, yc]
return:
updated cost_matirx, np.ndarray
"""
gating_dim = 2 if only_position else 4
gating_threshold = chi2inv95[gating_dim]
measurements = np.asarray([Tracklet.tlwh_to_xyah(det.tlwh) for det in dets]) # (len(dets), 4)
cost_matrix[cost_matrix > max_apperance_thresh] = gated_cost
for row, track in enumerate(tracks):
gating_distance = track.kalman_filter.gating_distance(measurements, )
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
cost_matrix[row] = self.lambda_ * cost_matrix[row] + (1 - self.lambda_) * gating_distance
return cost_matrix
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,366 @@
"""
implements base elements of trajectory
"""
import numpy as np
from collections import deque
from .basetrack import BaseTrack, TrackState
from .kalman_filters.bytetrack_kalman import ByteKalman
from .kalman_filters.botsort_kalman import BotKalman
from .kalman_filters.ocsort_kalman import OCSORTKalman
from .kalman_filters.sort_kalman import SORTKalman
from .kalman_filters.strongsort_kalman import NSAKalman
MOTION_MODEL_DICT = {
'sort': SORTKalman,
'byte': ByteKalman,
'bot': BotKalman,
'ocsort': OCSORTKalman,
'strongsort': NSAKalman,
}
STATE_CONVERT_DICT = {
'sort': 'xysa',
'byte': 'xyah',
'bot': 'xywh',
'ocsort': 'xysa',
'strongsort': 'xyah'
}
class Tracklet(BaseTrack):
def __init__(self, tlwh, score, category, motion='byte'):
# initial position
self._tlwh = np.asarray(tlwh, dtype=np.float)
self.is_activated = False
self.score = score
self.category = category
# kalman
self.motion = motion
self.kalman_filter = MOTION_MODEL_DICT[motion]()
self.convert_func = self.__getattribute__('tlwh_to_' + STATE_CONVERT_DICT[motion])
# init kalman
self.kalman_filter.initialize(self.convert_func(self._tlwh))
def predict(self):
self.kalman_filter.predict()
self.time_since_update += 1
def activate(self, frame_id):
self.track_id = self.next_id()
self.state = TrackState.Tracked
if frame_id == 1:
self.is_activated = True
self.frame_id = frame_id
self.start_frame = frame_id
def re_activate(self, new_track, frame_id, new_id=False):
# TODO different convert
self.kalman_filter.update(self.convert_func(new_track.tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.frame_id = frame_id
if new_id:
self.track_id = self.next_id()
self.score = new_track.score
def update(self, new_track, frame_id):
self.frame_id = frame_id
new_tlwh = new_track.tlwh
self.score = new_track.score
self.kalman_filter.update(self.convert_func(new_tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.time_since_update = 0
@property
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
return self.__getattribute__(STATE_CONVERT_DICT[self.motion] + '_to_tlwh')()
def xyah_to_tlwh(self, ):
x = self.kalman_filter.kf.x
ret = x[:4].copy()
ret[2] *= ret[3]
ret[:2] -= ret[2:] / 2
return ret
def xywh_to_tlwh(self, ):
x = self.kalman_filter.kf.x
ret = x[:4].copy()
ret[:2] -= ret[2:] / 2
return ret
def xysa_to_tlwh(self, ):
x = self.kalman_filter.kf.x
ret = x[:4].copy()
ret[2] = np.sqrt(x[2] * x[3])
ret[3] = x[2] / ret[2]
ret[:2] -= ret[2:] / 2
return ret
class Tracklet_w_reid(Tracklet):
"""
Tracklet class with reid features, for botsort, deepsort, etc.
"""
def __init__(self, tlwh, score, category, motion='byte',
feat=None, feat_history=50):
super().__init__(tlwh, score, category, motion)
self.smooth_feat = None # EMA feature
self.curr_feat = None # current feature
self.features = deque([], maxlen=feat_history) # all features
if feat is not None:
self.update_features(feat)
self.alpha = 0.9
def update_features(self, feat):
feat /= np.linalg.norm(feat)
self.curr_feat = feat
if self.smooth_feat is None:
self.smooth_feat = feat
else:
self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat
self.features.append(feat)
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
def re_activate(self, new_track, frame_id, new_id=False):
# TODO different convert
if isinstance(self.kalman_filter, NSAKalman):
self.kalman_filter.update(self.convert_func(new_track.tlwh), new_track.score)
else:
self.kalman_filter.update(self.convert_func(new_track.tlwh))
if new_track.curr_feat is not None:
self.update_features(new_track.curr_feat)
self.state = TrackState.Tracked
self.is_activated = True
self.frame_id = frame_id
if new_id:
self.track_id = self.next_id()
self.score = new_track.score
def update(self, new_track, frame_id):
self.frame_id = frame_id
new_tlwh = new_track.tlwh
self.score = new_track.score
if isinstance(self.kalman_filter, NSAKalman):
self.kalman_filter.update(self.convert_func(new_tlwh), self.score)
else:
self.kalman_filter.update(self.convert_func(new_tlwh))
self.state = TrackState.Tracked
self.is_activated = True
if new_track.curr_feat is not None:
self.update_features(new_track.curr_feat)
self.time_since_update = 0
class Tracklet_w_velocity(Tracklet):
"""
Tracklet class with reid features, for ocsort.
"""
def __init__(self, tlwh, score, category, motion='byte', delta_t=3):
super().__init__(tlwh, score, category, motion)
self.last_observation = np.array([-1, -1, -1, -1, -1]) # placeholder
self.observations = dict()
self.history_observations = []
self.velocity = None
self.delta_t = delta_t
self.age = 0 # mark the age
@staticmethod
def speed_direction(bbox1, bbox2):
cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0
cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0
speed = np.array([cy2 - cy1, cx2 - cx1])
norm = np.sqrt((cy2 - cy1)**2 + (cx2 - cx1)**2) + 1e-6
return speed / norm
def predict(self):
self.kalman_filter.predict()
self.age += 1
self.time_since_update += 1
def update(self, new_track, frame_id):
self.frame_id = frame_id
new_tlwh = new_track.tlwh
self.score = new_track.score
self.kalman_filter.update(self.convert_func(new_tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.time_since_update = 0
# update velocity and history buffer
new_tlbr = Tracklet_w_bbox_buffer.tlwh_to_tlbr(new_tlwh)
if self.last_observation.sum() >= 0: # no previous observation
previous_box = None
for i in range(self.delta_t):
dt = self.delta_t - i
if self.age - dt in self.observations:
previous_box = self.observations[self.age-dt]
break
if previous_box is None:
previous_box = self.last_observation
"""
Estimate the track speed direction with observations \Delta t steps away
"""
self.velocity = self.speed_direction(previous_box, new_tlbr)
new_observation = np.r_[new_tlbr, new_track.score]
self.last_observation = new_observation
self.observations[self.age] = new_observation
self.history_observations.append(new_observation)
class Tracklet_w_bbox_buffer(Tracklet):
"""
Tracklet class with buffer of bbox, for C_BIoU track.
"""
def __init__(self, tlwh, score, category, motion='byte'):
super().__init__(tlwh, score, category, motion)
# params in motion state
self.b1, self.b2, self.n = 0.3, 0.5, 5
self.origin_bbox_buffer = deque() # a deque store the original bbox(tlwh) from t - self.n to t, where t is the last time detected
self.origin_bbox_buffer.append(self._tlwh)
# buffered bbox, two buffer sizes
self.buffer_bbox1 = self.get_buffer_bbox(level=1)
self.buffer_bbox2 = self.get_buffer_bbox(level=2)
# motion state, s^{t + \delta} = o^t + (\delta / n) * \sum_{i=t-n+1}^t(o^i - o^{i-1}) = o^t + (\delta / n) * (o^t - o^{t - n})
self.motion_state1 = self.buffer_bbox1.copy()
self.motion_state2 = self.buffer_bbox2.copy()
def get_buffer_bbox(self, level=1, bbox=None):
"""
get buffered bbox as: (top, left, w, h) -> (top - bw, y - bh, w + 2bw, h + 2bh)
level = 1: b = self.b1 level = 2: b = self.b2
bbox: if not None, use bbox to calculate buffer_bbox, else use self._tlwh
"""
assert level in [1, 2], 'level must be 1 or 2'
b = self.b1 if level == 1 else self.b2
if bbox is None:
buffer_bbox = self._tlwh + np.array([-b*self._tlwh[2], -b*self._tlwh[3], 2*b*self._tlwh[2], 2*b*self._tlwh[3]])
else:
buffer_bbox = bbox + np.array([-b*bbox[2], -b*bbox[3], 2*b*bbox[2], 2*b*bbox[3]])
return np.maximum(0.0, buffer_bbox)
def re_activate(self, new_track, frame_id, new_id=False):
# TODO different convert
self.kalman_filter.update(self.convert_func(new_track.tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.frame_id = frame_id
if new_id:
self.track_id = self.next_id()
self.score = new_track.score
self._tlwh = new_track._tlwh
# update stored bbox
if (len(self.origin_bbox_buffer) > self.n):
self.origin_bbox_buffer.popleft()
self.origin_bbox_buffer.append(self._tlwh)
else:
self.origin_bbox_buffer.append(self._tlwh)
self.buffer_bbox1 = self.get_buffer_bbox(level=1)
self.buffer_bbox2 = self.get_buffer_bbox(level=2)
self.motion_state1 = self.buffer_bbox1.copy()
self.motion_state2 = self.buffer_bbox2.copy()
def update(self, new_track, frame_id):
self.frame_id = frame_id
new_tlwh = new_track.tlwh
self.score = new_track.score
self.kalman_filter.update(self.convert_func(new_tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.time_since_update = 0
# update stored bbox
if (len(self.origin_bbox_buffer) > self.n):
self.origin_bbox_buffer.popleft()
self.origin_bbox_buffer.append(new_tlwh)
else:
self.origin_bbox_buffer.append(new_tlwh)
# update motion state
if self.time_since_update: # have some unmatched frames
if len(self.origin_bbox_buffer) < self.n:
self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh)
self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh)
else: # s^{t + \delta} = o^t + (\delta / n) * (o^t - o^{t - n})
motion_state = self.origin_bbox_buffer[-1] + \
(self.time_since_update / self.n) * (self.origin_bbox_buffer[-1] - self.origin_bbox_buffer[0])
self.motion_state1 = self.get_buffer_bbox(level=1, bbox=motion_state)
self.motion_state2 = self.get_buffer_bbox(level=2, bbox=motion_state)
else: # no unmatched frames, use current detection as motion state
self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh)
self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh)
class Tracklet_w_depth(Tracklet):
"""
tracklet with depth info (i.e., 2000 - y2), for SparseTrack
"""
def __init__(self, tlwh, score, category, motion='byte'):
super().__init__(tlwh, score, category, motion)
@property
# @jit(nopython=True)
def deep_vec(self):
"""Convert bounding box to format `((top left, bottom right)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
cx = ret[0] + 0.5 * ret[2]
y2 = ret[1] + ret[3]
lendth = 2000 - y2
return np.asarray([cx, y2, lendth], dtype=np.float)

View File

@@ -0,0 +1,5 @@
from .eval import Evaluator
from . import datasets
from . import metrics
from . import plotting
from . import utils

View File

@@ -0,0 +1,65 @@
from functools import wraps
from time import perf_counter
import inspect
DO_TIMING = False
DISPLAY_LESS_PROGRESS = False
timer_dict = {}
counter = 0
def time(f):
@wraps(f)
def wrap(*args, **kw):
if DO_TIMING:
# Run function with timing
ts = perf_counter()
result = f(*args, **kw)
te = perf_counter()
tt = te-ts
# Get function name
arg_names = inspect.getfullargspec(f)[0]
if arg_names[0] == 'self' and DISPLAY_LESS_PROGRESS:
return result
elif arg_names[0] == 'self':
method_name = type(args[0]).__name__ + '.' + f.__name__
else:
method_name = f.__name__
# Record accumulative time in each function for analysis
if method_name in timer_dict.keys():
timer_dict[method_name] += tt
else:
timer_dict[method_name] = tt
# If code is finished, display timing summary
if method_name == "Evaluator.evaluate":
print("")
print("Timing analysis:")
for key, value in timer_dict.items():
print('%-70s %2.4f sec' % (key, value))
else:
# Get function argument values for printing special arguments of interest
arg_titles = ['tracker', 'seq', 'cls']
arg_vals = []
for i, a in enumerate(arg_names):
if a in arg_titles:
arg_vals.append(args[i])
arg_text = '(' + ', '.join(arg_vals) + ')'
# Display methods and functions with different indentation.
if arg_names[0] == 'self':
print('%-74s %2.4f sec' % (' '*4 + method_name + arg_text, tt))
elif arg_names[0] == 'test':
pass
else:
global counter
counter += 1
print('%i %-70s %2.4f sec' % (counter, method_name + arg_text, tt))
return result
else:
# If config["TIME_PROGRESS"] is false, or config["USE_PARALLEL"] is true, run functions normally without timing.
return f(*args, **kw)
return wrap

View File

@@ -0,0 +1,6 @@
import baseline_utils
import stp
import non_overlap
import pascal_colormap
import thresholder
import vizualize

View File

@@ -0,0 +1,321 @@
import os
import csv
import numpy as np
from copy import deepcopy
from PIL import Image
from pycocotools import mask as mask_utils
from scipy.optimize import linear_sum_assignment
from trackeval.baselines.pascal_colormap import pascal_colormap
def load_seq(file_to_load):
""" Load input data from file in RobMOTS format (e.g. provided detections).
Returns: Data object with the following structure (see STP :
data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
"""
fp = open(file_to_load)
dialect = csv.Sniffer().sniff(fp.readline(), delimiters=' ')
dialect.skipinitialspace = True
fp.seek(0)
reader = csv.reader(fp, dialect)
read_data = {}
num_timesteps = 0
for i, row in enumerate(reader):
if row[-1] in '':
row = row[:-1]
t = int(row[0])
cid = row[1]
c = int(row[2])
s = row[3]
h = row[4]
w = row[5]
rle = row[6]
if t >= num_timesteps:
num_timesteps = t + 1
if c in read_data.keys():
if t in read_data[c].keys():
read_data[c][t]['ids'].append(cid)
read_data[c][t]['scores'].append(s)
read_data[c][t]['im_hs'].append(h)
read_data[c][t]['im_ws'].append(w)
read_data[c][t]['mask_rles'].append(rle)
else:
read_data[c][t] = {}
read_data[c][t]['ids'] = [cid]
read_data[c][t]['scores'] = [s]
read_data[c][t]['im_hs'] = [h]
read_data[c][t]['im_ws'] = [w]
read_data[c][t]['mask_rles'] = [rle]
else:
read_data[c] = {t: {}}
read_data[c][t]['ids'] = [cid]
read_data[c][t]['scores'] = [s]
read_data[c][t]['im_hs'] = [h]
read_data[c][t]['im_ws'] = [w]
read_data[c][t]['mask_rles'] = [rle]
fp.close()
data = {}
for c in read_data.keys():
data[c] = [{} for _ in range(num_timesteps)]
for t in range(num_timesteps):
if t in read_data[c].keys():
data[c][t]['ids'] = np.atleast_1d(read_data[c][t]['ids']).astype(int)
data[c][t]['scores'] = np.atleast_1d(read_data[c][t]['scores']).astype(float)
data[c][t]['im_hs'] = np.atleast_1d(read_data[c][t]['im_hs']).astype(int)
data[c][t]['im_ws'] = np.atleast_1d(read_data[c][t]['im_ws']).astype(int)
data[c][t]['mask_rles'] = np.atleast_1d(read_data[c][t]['mask_rles']).astype(str)
else:
data[c][t]['ids'] = np.empty(0).astype(int)
data[c][t]['scores'] = np.empty(0).astype(float)
data[c][t]['im_hs'] = np.empty(0).astype(int)
data[c][t]['im_ws'] = np.empty(0).astype(int)
data[c][t]['mask_rles'] = np.empty(0).astype(str)
return data
def threshold(tdata, thresh):
""" Removes detections below a certian threshold ('thresh') score. """
new_data = {}
to_keep = tdata['scores'] > thresh
for field in ['ids', 'scores', 'im_hs', 'im_ws', 'mask_rles']:
new_data[field] = tdata[field][to_keep]
return new_data
def create_coco_mask(mask_rles, im_hs, im_ws):
""" Converts mask as rle text (+ height and width) to encoded version used by pycocotools. """
coco_masks = [{'size': [h, w], 'counts': m.encode(encoding='UTF-8')}
for h, w, m in zip(im_hs, im_ws, mask_rles)]
return coco_masks
def mask_iou(mask_rles1, mask_rles2, im_hs, im_ws, do_ioa=0):
""" Calculate mask IoU between two masks.
Further allows 'intersection over area' instead of IoU (over the area of mask_rle1).
Allows either to pass in 1 boolean for do_ioa for all mask_rles2 or also one for each mask_rles2.
It is recommended that mask_rles1 is a detection and mask_rles2 is a groundtruth.
"""
coco_masks1 = create_coco_mask(mask_rles1, im_hs, im_ws)
coco_masks2 = create_coco_mask(mask_rles2, im_hs, im_ws)
if not hasattr(do_ioa, "__len__"):
do_ioa = [do_ioa]*len(coco_masks2)
assert(len(coco_masks2) == len(do_ioa))
if len(coco_masks1) == 0 or len(coco_masks2) == 0:
iou = np.zeros(len(coco_masks1), len(coco_masks2))
else:
iou = mask_utils.iou(coco_masks1, coco_masks2, do_ioa)
return iou
def sort_by_score(t_data):
""" Sorts data by score """
sort_index = np.argsort(t_data['scores'])[::-1]
for k in t_data.keys():
t_data[k] = t_data[k][sort_index]
return t_data
def mask_NMS(t_data, nms_threshold=0.5, already_sorted=False):
""" Remove redundant masks by performing non-maximum suppression (NMS) """
# Sort by score
if not already_sorted:
t_data = sort_by_score(t_data)
# Calculate the mask IoU between all detections in the timestep.
mask_ious_all = mask_iou(t_data['mask_rles'], t_data['mask_rles'], t_data['im_hs'], t_data['im_ws'])
# Determine which masks NMS should remove
# (those overlapping greater than nms_threshold with another mask that has a higher score)
num_dets = len(t_data['mask_rles'])
to_remove = [False for _ in range(num_dets)]
for i in range(num_dets):
if not to_remove[i]:
for j in range(i + 1, num_dets):
if mask_ious_all[i, j] > nms_threshold:
to_remove[j] = True
# Remove detections which should be removed
to_keep = np.logical_not(to_remove)
for k in t_data.keys():
t_data[k] = t_data[k][to_keep]
return t_data
def non_overlap(t_data, already_sorted=False):
""" Enforces masks to be non-overlapping in an image, does this by putting masks 'on top of one another',
such that higher score masks 'occlude' and thus remove parts of lower scoring masks.
Help wanted: if anyone knows a way to do this WITHOUT converting the RLE to the np.array let me know, because that
would be MUCH more efficient. (I have tried, but haven't yet had success).
"""
# Sort by score
if not already_sorted:
t_data = sort_by_score(t_data)
# Get coco masks
coco_masks = create_coco_mask(t_data['mask_rles'], t_data['im_hs'], t_data['im_ws'])
# Create a single np.array to hold all of the non-overlapping mask
masks_array = np.zeros((t_data['im_hs'][0], t_data['im_ws'][0]), 'uint8')
# Decode each mask into a np.array, and place it into the overall array for the whole frame.
# Since masks with the lowest score are placed first, they are 'partially overridden' by masks with a higher score
# if they overlap.
for i, mask in enumerate(coco_masks[::-1]):
masks_array[mask_utils.decode(mask).astype('bool')] = i + 1
# Encode the resulting np.array back into a set of coco_masks which are now non-overlapping.
num_dets = len(coco_masks)
for i, j in enumerate(range(1, num_dets + 1)[::-1]):
coco_masks[i] = mask_utils.encode(np.asfortranarray(masks_array == j, dtype=np.uint8))
# Convert from coco_mask back into our mask_rle format.
t_data['mask_rles'] = [m['counts'].decode("utf-8") for m in coco_masks]
return t_data
def masks2boxes(mask_rles, im_hs, im_ws):
""" Extracts bounding boxes which surround a set of masks. """
coco_masks = create_coco_mask(mask_rles, im_hs, im_ws)
boxes = np.array([mask_utils.toBbox(x) for x in coco_masks])
if len(boxes) == 0:
boxes = np.empty((0, 4))
return boxes
def box_iou(bboxes1, bboxes2, box_format='xywh', do_ioa=False, do_giou=False):
""" Calculates the IOU (intersection over union) between two arrays of boxes.
Allows variable box formats ('xywh' and 'x0y0x1y1').
If do_ioa (intersection over area), then calculates the intersection over the area of boxes1 - this is commonly
used to determine if detections are within crowd ignore region.
If do_giou (generalized intersection over union, then calculates giou.
"""
if len(bboxes1) == 0 or len(bboxes2) == 0:
ious = np.zeros((len(bboxes1), len(bboxes2)))
return ious
if box_format in 'xywh':
# layout: (x0, y0, w, h)
bboxes1 = deepcopy(bboxes1)
bboxes2 = deepcopy(bboxes2)
bboxes1[:, 2] = bboxes1[:, 0] + bboxes1[:, 2]
bboxes1[:, 3] = bboxes1[:, 1] + bboxes1[:, 3]
bboxes2[:, 2] = bboxes2[:, 0] + bboxes2[:, 2]
bboxes2[:, 3] = bboxes2[:, 1] + bboxes2[:, 3]
elif box_format not in 'x0y0x1y1':
raise (Exception('box_format %s is not implemented' % box_format))
# layout: (x0, y0, x1, y1)
min_ = np.minimum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
max_ = np.maximum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
intersection = np.maximum(min_[..., 2] - max_[..., 0], 0) * np.maximum(min_[..., 3] - max_[..., 1], 0)
area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
if do_ioa:
ioas = np.zeros_like(intersection)
valid_mask = area1 > 0 + np.finfo('float').eps
ioas[valid_mask, :] = intersection[valid_mask, :] / area1[valid_mask][:, np.newaxis]
return ioas
else:
area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
union = area1[:, np.newaxis] + area2[np.newaxis, :] - intersection
intersection[area1 <= 0 + np.finfo('float').eps, :] = 0
intersection[:, area2 <= 0 + np.finfo('float').eps] = 0
intersection[union <= 0 + np.finfo('float').eps] = 0
union[union <= 0 + np.finfo('float').eps] = 1
ious = intersection / union
if do_giou:
enclosing_area = np.maximum(max_[..., 2] - min_[..., 0], 0) * np.maximum(max_[..., 3] - min_[..., 1], 0)
eps = 1e-7
# giou
ious = ious - ((enclosing_area - union) / (enclosing_area + eps))
return ious
def match(match_scores):
match_rows, match_cols = linear_sum_assignment(-match_scores)
return match_rows, match_cols
def write_seq(output_data, out_file):
out_loc = os.path.dirname(out_file)
if not os.path.exists(out_loc):
os.makedirs(out_loc, exist_ok=True)
fp = open(out_file, 'w', newline='')
writer = csv.writer(fp, delimiter=' ')
for row in output_data:
writer.writerow(row)
fp.close()
def combine_classes(data):
""" Converts data from a class-separated to a class-combined format.
Input format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
Output format: data[t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles', 'cls'}
"""
output_data = [{} for _ in list(data.values())[0]]
for cls, cls_data in data.items():
for timestep, t_data in enumerate(cls_data):
for k in t_data.keys():
if k in output_data[timestep].keys():
output_data[timestep][k] += list(t_data[k])
else:
output_data[timestep][k] = list(t_data[k])
if 'cls' in output_data[timestep].keys():
output_data[timestep]['cls'] += [cls]*len(output_data[timestep]['ids'])
else:
output_data[timestep]['cls'] = [cls]*len(output_data[timestep]['ids'])
for timestep, t_data in enumerate(output_data):
for k in t_data.keys():
output_data[timestep][k] = np.array(output_data[timestep][k])
return output_data
def save_as_png(t_data, out_file, im_h, im_w):
""" Save a set of segmentation masks into a PNG format, the same as used for the DAVIS dataset."""
if len(t_data['mask_rles']) > 0:
coco_masks = create_coco_mask(t_data['mask_rles'], t_data['im_hs'], t_data['im_ws'])
list_of_np_masks = [mask_utils.decode(mask) for mask in coco_masks]
png = np.zeros((t_data['im_hs'][0], t_data['im_ws'][0]))
for mask, c_id in zip(list_of_np_masks, t_data['ids']):
png[mask.astype("bool")] = c_id + 1
else:
png = np.zeros((im_h, im_w))
if not os.path.exists(os.path.dirname(out_file)):
os.makedirs(os.path.dirname(out_file))
colmap = (np.array(pascal_colormap) * 255).round().astype("uint8")
palimage = Image.new('P', (16, 16))
palimage.putpalette(colmap)
im = Image.fromarray(np.squeeze(png.astype("uint8")))
im2 = im.quantize(palette=palimage)
im2.save(out_file)
def get_frame_size(data):
""" Gets frame height and width from data. """
for cls, cls_data in data.items():
for timestep, t_data in enumerate(cls_data):
if len(t_data['im_hs'] > 0):
im_h = t_data['im_hs'][0]
im_w = t_data['im_ws'][0]
return im_h, im_w
return None

View File

@@ -0,0 +1,92 @@
"""
Non-Overlap: Code to take in a set of raw detections and produce a set of non-overlapping detections from it.
Author: Jonathon Luiten
"""
import os
import sys
from multiprocessing.pool import Pool
from multiprocessing import freeze_support
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
from trackeval.baselines import baseline_utils as butils
from trackeval.utils import get_code_path
code_path = get_code_path()
config = {
'INPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/raw_supplied/data/'),
'OUTPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/non_overlap_supplied/data/'),
'SPLIT': 'train', # valid: 'train', 'val', 'test'.
'Benchmarks': None, # If None, all benchmarks in SPLIT.
'Num_Parallel_Cores': None, # If None, run without parallel.
'THRESHOLD_NMS_MASK_IOU': 0.5,
}
def do_sequence(seq_file):
# Load input data from file (e.g. provided detections)
# data format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
data = butils.load_seq(seq_file)
# Converts data from a class-separated to a class-combined format.
# data[t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles', 'cls'}
data = butils.combine_classes(data)
# Where to accumulate output data for writing out
output_data = []
# Run for each timestep.
for timestep, t_data in enumerate(data):
# Remove redundant masks by performing non-maximum suppression (NMS)
t_data = butils.mask_NMS(t_data, nms_threshold=config['THRESHOLD_NMS_MASK_IOU'])
# Perform non-overlap, to get non_overlapping masks.
t_data = butils.non_overlap(t_data, already_sorted=True)
# Save result in output format to write to file later.
# Output Format = [timestep ID class score im_h im_w mask_RLE]
for i in range(len(t_data['ids'])):
row = [timestep, int(t_data['ids'][i]), t_data['cls'][i], t_data['scores'][i], t_data['im_hs'][i],
t_data['im_ws'][i], t_data['mask_rles'][i]]
output_data.append(row)
# Write results to file
out_file = seq_file.replace(config['INPUT_FOL'].format(split=config['SPLIT']),
config['OUTPUT_FOL'].format(split=config['SPLIT']))
butils.write_seq(output_data, out_file)
print('DONE:', seq_file)
if __name__ == '__main__':
# Required to fix bug in multiprocessing on windows.
freeze_support()
# Obtain list of sequences to run tracker for.
if config['Benchmarks']:
benchmarks = config['Benchmarks']
else:
benchmarks = ['davis_unsupervised', 'kitti_mots', 'youtube_vis', 'ovis', 'bdd_mots', 'tao']
if config['SPLIT'] != 'train':
benchmarks += ['waymo', 'mots_challenge']
seqs_todo = []
for bench in benchmarks:
bench_fol = os.path.join(config['INPUT_FOL'].format(split=config['SPLIT']), bench)
seqs_todo += [os.path.join(bench_fol, seq) for seq in os.listdir(bench_fol)]
# Run in parallel
if config['Num_Parallel_Cores']:
with Pool(config['Num_Parallel_Cores']) as pool:
results = pool.map(do_sequence, seqs_todo)
# Run in series
else:
for seq_todo in seqs_todo:
do_sequence(seq_todo)

View File

@@ -0,0 +1,257 @@
pascal_colormap = [
0 , 0, 0,
0.5020, 0, 0,
0, 0.5020, 0,
0.5020, 0.5020, 0,
0, 0, 0.5020,
0.5020, 0, 0.5020,
0, 0.5020, 0.5020,
0.5020, 0.5020, 0.5020,
0.2510, 0, 0,
0.7529, 0, 0,
0.2510, 0.5020, 0,
0.7529, 0.5020, 0,
0.2510, 0, 0.5020,
0.7529, 0, 0.5020,
0.2510, 0.5020, 0.5020,
0.7529, 0.5020, 0.5020,
0, 0.2510, 0,
0.5020, 0.2510, 0,
0, 0.7529, 0,
0.5020, 0.7529, 0,
0, 0.2510, 0.5020,
0.5020, 0.2510, 0.5020,
0, 0.7529, 0.5020,
0.5020, 0.7529, 0.5020,
0.2510, 0.2510, 0,
0.7529, 0.2510, 0,
0.2510, 0.7529, 0,
0.7529, 0.7529, 0,
0.2510, 0.2510, 0.5020,
0.7529, 0.2510, 0.5020,
0.2510, 0.7529, 0.5020,
0.7529, 0.7529, 0.5020,
0, 0, 0.2510,
0.5020, 0, 0.2510,
0, 0.5020, 0.2510,
0.5020, 0.5020, 0.2510,
0, 0, 0.7529,
0.5020, 0, 0.7529,
0, 0.5020, 0.7529,
0.5020, 0.5020, 0.7529,
0.2510, 0, 0.2510,
0.7529, 0, 0.2510,
0.2510, 0.5020, 0.2510,
0.7529, 0.5020, 0.2510,
0.2510, 0, 0.7529,
0.7529, 0, 0.7529,
0.2510, 0.5020, 0.7529,
0.7529, 0.5020, 0.7529,
0, 0.2510, 0.2510,
0.5020, 0.2510, 0.2510,
0, 0.7529, 0.2510,
0.5020, 0.7529, 0.2510,
0, 0.2510, 0.7529,
0.5020, 0.2510, 0.7529,
0, 0.7529, 0.7529,
0.5020, 0.7529, 0.7529,
0.2510, 0.2510, 0.2510,
0.7529, 0.2510, 0.2510,
0.2510, 0.7529, 0.2510,
0.7529, 0.7529, 0.2510,
0.2510, 0.2510, 0.7529,
0.7529, 0.2510, 0.7529,
0.2510, 0.7529, 0.7529,
0.7529, 0.7529, 0.7529,
0.1255, 0, 0,
0.6275, 0, 0,
0.1255, 0.5020, 0,
0.6275, 0.5020, 0,
0.1255, 0, 0.5020,
0.6275, 0, 0.5020,
0.1255, 0.5020, 0.5020,
0.6275, 0.5020, 0.5020,
0.3765, 0, 0,
0.8784, 0, 0,
0.3765, 0.5020, 0,
0.8784, 0.5020, 0,
0.3765, 0, 0.5020,
0.8784, 0, 0.5020,
0.3765, 0.5020, 0.5020,
0.8784, 0.5020, 0.5020,
0.1255, 0.2510, 0,
0.6275, 0.2510, 0,
0.1255, 0.7529, 0,
0.6275, 0.7529, 0,
0.1255, 0.2510, 0.5020,
0.6275, 0.2510, 0.5020,
0.1255, 0.7529, 0.5020,
0.6275, 0.7529, 0.5020,
0.3765, 0.2510, 0,
0.8784, 0.2510, 0,
0.3765, 0.7529, 0,
0.8784, 0.7529, 0,
0.3765, 0.2510, 0.5020,
0.8784, 0.2510, 0.5020,
0.3765, 0.7529, 0.5020,
0.8784, 0.7529, 0.5020,
0.1255, 0, 0.2510,
0.6275, 0, 0.2510,
0.1255, 0.5020, 0.2510,
0.6275, 0.5020, 0.2510,
0.1255, 0, 0.7529,
0.6275, 0, 0.7529,
0.1255, 0.5020, 0.7529,
0.6275, 0.5020, 0.7529,
0.3765, 0, 0.2510,
0.8784, 0, 0.2510,
0.3765, 0.5020, 0.2510,
0.8784, 0.5020, 0.2510,
0.3765, 0, 0.7529,
0.8784, 0, 0.7529,
0.3765, 0.5020, 0.7529,
0.8784, 0.5020, 0.7529,
0.1255, 0.2510, 0.2510,
0.6275, 0.2510, 0.2510,
0.1255, 0.7529, 0.2510,
0.6275, 0.7529, 0.2510,
0.1255, 0.2510, 0.7529,
0.6275, 0.2510, 0.7529,
0.1255, 0.7529, 0.7529,
0.6275, 0.7529, 0.7529,
0.3765, 0.2510, 0.2510,
0.8784, 0.2510, 0.2510,
0.3765, 0.7529, 0.2510,
0.8784, 0.7529, 0.2510,
0.3765, 0.2510, 0.7529,
0.8784, 0.2510, 0.7529,
0.3765, 0.7529, 0.7529,
0.8784, 0.7529, 0.7529,
0, 0.1255, 0,
0.5020, 0.1255, 0,
0, 0.6275, 0,
0.5020, 0.6275, 0,
0, 0.1255, 0.5020,
0.5020, 0.1255, 0.5020,
0, 0.6275, 0.5020,
0.5020, 0.6275, 0.5020,
0.2510, 0.1255, 0,
0.7529, 0.1255, 0,
0.2510, 0.6275, 0,
0.7529, 0.6275, 0,
0.2510, 0.1255, 0.5020,
0.7529, 0.1255, 0.5020,
0.2510, 0.6275, 0.5020,
0.7529, 0.6275, 0.5020,
0, 0.3765, 0,
0.5020, 0.3765, 0,
0, 0.8784, 0,
0.5020, 0.8784, 0,
0, 0.3765, 0.5020,
0.5020, 0.3765, 0.5020,
0, 0.8784, 0.5020,
0.5020, 0.8784, 0.5020,
0.2510, 0.3765, 0,
0.7529, 0.3765, 0,
0.2510, 0.8784, 0,
0.7529, 0.8784, 0,
0.2510, 0.3765, 0.5020,
0.7529, 0.3765, 0.5020,
0.2510, 0.8784, 0.5020,
0.7529, 0.8784, 0.5020,
0, 0.1255, 0.2510,
0.5020, 0.1255, 0.2510,
0, 0.6275, 0.2510,
0.5020, 0.6275, 0.2510,
0, 0.1255, 0.7529,
0.5020, 0.1255, 0.7529,
0, 0.6275, 0.7529,
0.5020, 0.6275, 0.7529,
0.2510, 0.1255, 0.2510,
0.7529, 0.1255, 0.2510,
0.2510, 0.6275, 0.2510,
0.7529, 0.6275, 0.2510,
0.2510, 0.1255, 0.7529,
0.7529, 0.1255, 0.7529,
0.2510, 0.6275, 0.7529,
0.7529, 0.6275, 0.7529,
0, 0.3765, 0.2510,
0.5020, 0.3765, 0.2510,
0, 0.8784, 0.2510,
0.5020, 0.8784, 0.2510,
0, 0.3765, 0.7529,
0.5020, 0.3765, 0.7529,
0, 0.8784, 0.7529,
0.5020, 0.8784, 0.7529,
0.2510, 0.3765, 0.2510,
0.7529, 0.3765, 0.2510,
0.2510, 0.8784, 0.2510,
0.7529, 0.8784, 0.2510,
0.2510, 0.3765, 0.7529,
0.7529, 0.3765, 0.7529,
0.2510, 0.8784, 0.7529,
0.7529, 0.8784, 0.7529,
0.1255, 0.1255, 0,
0.6275, 0.1255, 0,
0.1255, 0.6275, 0,
0.6275, 0.6275, 0,
0.1255, 0.1255, 0.5020,
0.6275, 0.1255, 0.5020,
0.1255, 0.6275, 0.5020,
0.6275, 0.6275, 0.5020,
0.3765, 0.1255, 0,
0.8784, 0.1255, 0,
0.3765, 0.6275, 0,
0.8784, 0.6275, 0,
0.3765, 0.1255, 0.5020,
0.8784, 0.1255, 0.5020,
0.3765, 0.6275, 0.5020,
0.8784, 0.6275, 0.5020,
0.1255, 0.3765, 0,
0.6275, 0.3765, 0,
0.1255, 0.8784, 0,
0.6275, 0.8784, 0,
0.1255, 0.3765, 0.5020,
0.6275, 0.3765, 0.5020,
0.1255, 0.8784, 0.5020,
0.6275, 0.8784, 0.5020,
0.3765, 0.3765, 0,
0.8784, 0.3765, 0,
0.3765, 0.8784, 0,
0.8784, 0.8784, 0,
0.3765, 0.3765, 0.5020,
0.8784, 0.3765, 0.5020,
0.3765, 0.8784, 0.5020,
0.8784, 0.8784, 0.5020,
0.1255, 0.1255, 0.2510,
0.6275, 0.1255, 0.2510,
0.1255, 0.6275, 0.2510,
0.6275, 0.6275, 0.2510,
0.1255, 0.1255, 0.7529,
0.6275, 0.1255, 0.7529,
0.1255, 0.6275, 0.7529,
0.6275, 0.6275, 0.7529,
0.3765, 0.1255, 0.2510,
0.8784, 0.1255, 0.2510,
0.3765, 0.6275, 0.2510,
0.8784, 0.6275, 0.2510,
0.3765, 0.1255, 0.7529,
0.8784, 0.1255, 0.7529,
0.3765, 0.6275, 0.7529,
0.8784, 0.6275, 0.7529,
0.1255, 0.3765, 0.2510,
0.6275, 0.3765, 0.2510,
0.1255, 0.8784, 0.2510,
0.6275, 0.8784, 0.2510,
0.1255, 0.3765, 0.7529,
0.6275, 0.3765, 0.7529,
0.1255, 0.8784, 0.7529,
0.6275, 0.8784, 0.7529,
0.3765, 0.3765, 0.2510,
0.8784, 0.3765, 0.2510,
0.3765, 0.8784, 0.2510,
0.8784, 0.8784, 0.2510,
0.3765, 0.3765, 0.7529,
0.8784, 0.3765, 0.7529,
0.3765, 0.8784, 0.7529,
0.8784, 0.8784, 0.7529]

View File

@@ -0,0 +1,144 @@
"""
STP: Simplest Tracker Possible
Author: Jonathon Luiten
This simple tracker, simply assigns track IDs which maximise the 'bounding box IoU' between previous tracks and current
detections. It is also able to match detections to tracks at more than one timestep previously.
"""
import os
import sys
import numpy as np
from multiprocessing.pool import Pool
from multiprocessing import freeze_support
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
from trackeval.baselines import baseline_utils as butils
from trackeval.utils import get_code_path
code_path = get_code_path()
config = {
'INPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/non_overlap_supplied/data/'),
'OUTPUT_FOL': os.path.join(code_path, 'data/trackers/rob_mots/{split}/STP/data/'),
'SPLIT': 'train', # valid: 'train', 'val', 'test'.
'Benchmarks': None, # If None, all benchmarks in SPLIT.
'Num_Parallel_Cores': None, # If None, run without parallel.
'DETECTION_THRESHOLD': 0.5,
'ASSOCIATION_THRESHOLD': 1e-10,
'MAX_FRAMES_SKIP': 7
}
def track_sequence(seq_file):
# Load input data from file (e.g. provided detections)
# data format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
data = butils.load_seq(seq_file)
# Where to accumulate output data for writing out
output_data = []
# To ensure IDs are unique per object across all classes.
curr_max_id = 0
# Run tracker for each class.
for cls, cls_data in data.items():
# Initialize container for holding previously tracked objects.
prev = {'boxes': np.empty((0, 4)),
'ids': np.array([], np.int),
'timesteps': np.array([])}
# Run tracker for each timestep.
for timestep, t_data in enumerate(cls_data):
# Threshold detections.
t_data = butils.threshold(t_data, config['DETECTION_THRESHOLD'])
# Convert mask dets to bounding boxes.
boxes = butils.masks2boxes(t_data['mask_rles'], t_data['im_hs'], t_data['im_ws'])
# Calculate IoU between previous and current frame dets.
ious = butils.box_iou(prev['boxes'], boxes)
# Score which decreases quickly for previous dets depending on how many timesteps before they come from.
prev_timestep_scores = np.power(10, -1 * prev['timesteps'])
# Matching score is such that it first tries to match 'most recent timesteps',
# and within each timestep maximised IoU.
match_scores = prev_timestep_scores[:, np.newaxis] * ious
# Find best matching between current dets and previous tracks.
match_rows, match_cols = butils.match(match_scores)
# Remove matches that have an IoU below a certain threshold.
actually_matched_mask = ious[match_rows, match_cols] > config['ASSOCIATION_THRESHOLD']
match_rows = match_rows[actually_matched_mask]
match_cols = match_cols[actually_matched_mask]
# Assign the prev track ID to the current dets if they were matched.
ids = np.nan * np.ones((len(boxes),), np.int)
ids[match_cols] = prev['ids'][match_rows]
# Create new track IDs for dets that were not matched to previous tracks.
num_not_matched = len(ids) - len(match_cols)
new_ids = np.arange(curr_max_id + 1, curr_max_id + num_not_matched + 1)
ids[np.isnan(ids)] = new_ids
# Update maximum ID to ensure future added tracks have a unique ID value.
curr_max_id += num_not_matched
# Drop tracks from 'previous tracks' if they have not been matched in the last MAX_FRAMES_SKIP frames.
unmatched_rows = [i for i in range(len(prev['ids'])) if
i not in match_rows and (prev['timesteps'][i] + 1 <= config['MAX_FRAMES_SKIP'])]
# Update the set of previous tracking results to include the newly tracked detections.
prev['ids'] = np.concatenate((ids, prev['ids'][unmatched_rows]), axis=0)
prev['boxes'] = np.concatenate((np.atleast_2d(boxes), np.atleast_2d(prev['boxes'][unmatched_rows])), axis=0)
prev['timesteps'] = np.concatenate((np.zeros((len(ids),)), prev['timesteps'][unmatched_rows] + 1), axis=0)
# Save result in output format to write to file later.
# Output Format = [timestep ID class score im_h im_w mask_RLE]
for i in range(len(t_data['ids'])):
row = [timestep, int(ids[i]), cls, t_data['scores'][i], t_data['im_hs'][i], t_data['im_ws'][i],
t_data['mask_rles'][i]]
output_data.append(row)
# Write results to file
out_file = seq_file.replace(config['INPUT_FOL'].format(split=config['SPLIT']),
config['OUTPUT_FOL'].format(split=config['SPLIT']))
butils.write_seq(output_data, out_file)
print('DONE:', seq_file)
if __name__ == '__main__':
# Required to fix bug in multiprocessing on windows.
freeze_support()
# Obtain list of sequences to run tracker for.
if config['Benchmarks']:
benchmarks = config['Benchmarks']
else:
benchmarks = ['davis_unsupervised', 'kitti_mots', 'youtube_vis', 'ovis', 'bdd_mots', 'tao']
if config['SPLIT'] != 'train':
benchmarks += ['waymo', 'mots_challenge']
seqs_todo = []
for bench in benchmarks:
bench_fol = os.path.join(config['INPUT_FOL'].format(split=config['SPLIT']), bench)
seqs_todo += [os.path.join(bench_fol, seq) for seq in os.listdir(bench_fol)]
# Run in parallel
if config['Num_Parallel_Cores']:
with Pool(config['Num_Parallel_Cores']) as pool:
results = pool.map(track_sequence, seqs_todo)
# Run in series
else:
for seq_todo in seqs_todo:
track_sequence(seq_todo)

View File

@@ -0,0 +1,92 @@
"""
Thresholder
Author: Jonathon Luiten
Simply reads in a set of detection, thresholds them at a certain score threshold, and writes them out again.
"""
import os
import sys
from multiprocessing.pool import Pool
from multiprocessing import freeze_support
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
from trackeval.baselines import baseline_utils as butils
from trackeval.utils import get_code_path
THRESHOLD = 0.2
code_path = get_code_path()
config = {
'INPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/non_overlap_supplied/data/'),
'OUTPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/threshold_' + str(100*THRESHOLD) + '/data/'),
'SPLIT': 'train', # valid: 'train', 'val', 'test'.
'Benchmarks': None, # If None, all benchmarks in SPLIT.
'Num_Parallel_Cores': None, # If None, run without parallel.
'DETECTION_THRESHOLD': THRESHOLD,
}
def do_sequence(seq_file):
# Load input data from file (e.g. provided detections)
# data format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
data = butils.load_seq(seq_file)
# Where to accumulate output data for writing out
output_data = []
# Run for each class.
for cls, cls_data in data.items():
# Run for each timestep.
for timestep, t_data in enumerate(cls_data):
# Threshold detections.
t_data = butils.threshold(t_data, config['DETECTION_THRESHOLD'])
# Save result in output format to write to file later.
# Output Format = [timestep ID class score im_h im_w mask_RLE]
for i in range(len(t_data['ids'])):
row = [timestep, int(t_data['ids'][i]), cls, t_data['scores'][i], t_data['im_hs'][i],
t_data['im_ws'][i], t_data['mask_rles'][i]]
output_data.append(row)
# Write results to file
out_file = seq_file.replace(config['INPUT_FOL'].format(split=config['SPLIT']),
config['OUTPUT_FOL'].format(split=config['SPLIT']))
butils.write_seq(output_data, out_file)
print('DONE:', seq_todo)
if __name__ == '__main__':
# Required to fix bug in multiprocessing on windows.
freeze_support()
# Obtain list of sequences to run tracker for.
if config['Benchmarks']:
benchmarks = config['Benchmarks']
else:
benchmarks = ['davis_unsupervised', 'kitti_mots', 'youtube_vis', 'ovis', 'bdd_mots', 'tao']
if config['SPLIT'] != 'train':
benchmarks += ['waymo', 'mots_challenge']
seqs_todo = []
for bench in benchmarks:
bench_fol = os.path.join(config['INPUT_FOL'].format(split=config['SPLIT']), bench)
seqs_todo += [os.path.join(bench_fol, seq) for seq in os.listdir(bench_fol)]
# Run in parallel
if config['Num_Parallel_Cores']:
with Pool(config['Num_Parallel_Cores']) as pool:
results = pool.map(do_sequence, seqs_todo)
# Run in series
else:
for seq_todo in seqs_todo:
do_sequence(seq_todo)

View File

@@ -0,0 +1,94 @@
"""
Vizualize: Code which converts .txt rle tracking results into a visual .png format.
Author: Jonathon Luiten
"""
import os
import sys
from multiprocessing.pool import Pool
from multiprocessing import freeze_support
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
from trackeval.baselines import baseline_utils as butils
from trackeval.utils import get_code_path
from trackeval.datasets.rob_mots_classmap import cls_id_to_name
code_path = get_code_path()
config = {
# Tracker format:
'INPUT_FOL': os.path.join(code_path, 'data/trackers/rob_mots/{split}/STP/data/{bench}'),
'OUTPUT_FOL': os.path.join(code_path, 'data/viz/rob_mots/{split}/STP/data/{bench}'),
# GT format:
# 'INPUT_FOL': os.path.join(code_path, 'data/gt/rob_mots/{split}/{bench}/data/'),
# 'OUTPUT_FOL': os.path.join(code_path, 'data/gt_viz/rob_mots/{split}/{bench}/'),
'SPLIT': 'train', # valid: 'train', 'val', 'test'.
'Benchmarks': None, # If None, all benchmarks in SPLIT.
'Num_Parallel_Cores': None, # If None, run without parallel.
}
def do_sequence(seq_file):
# Folder to save resulting visualization in
out_fol = seq_file.replace(config['INPUT_FOL'].format(split=config['SPLIT'], bench=bench),
config['OUTPUT_FOL'].format(split=config['SPLIT'], bench=bench)).replace('.txt', '')
# Load input data from file (e.g. provided detections)
# data format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
data = butils.load_seq(seq_file)
# Get frame size for visualizing empty frames
im_h, im_w = butils.get_frame_size(data)
# First run for each class.
for cls, cls_data in data.items():
if cls >= 100:
continue
# Run for each timestep.
for timestep, t_data in enumerate(cls_data):
# Save out visualization
out_file = os.path.join(out_fol, cls_id_to_name[cls], str(timestep).zfill(5) + '.png')
butils.save_as_png(t_data, out_file, im_h, im_w)
# Then run for all classes combined
# Converts data from a class-separated to a class-combined format.
data = butils.combine_classes(data)
# Run for each timestep.
for timestep, t_data in enumerate(data):
# Save out visualization
out_file = os.path.join(out_fol, 'all_classes', str(timestep).zfill(5) + '.png')
butils.save_as_png(t_data, out_file, im_h, im_w)
print('DONE:', seq_file)
if __name__ == '__main__':
# Required to fix bug in multiprocessing on windows.
freeze_support()
# Obtain list of sequences to run tracker for.
if config['Benchmarks']:
benchmarks = config['Benchmarks']
else:
benchmarks = ['davis_unsupervised', 'kitti_mots', 'youtube_vis', 'ovis', 'bdd_mots', 'tao']
if config['SPLIT'] != 'train':
benchmarks += ['waymo', 'mots_challenge']
seqs_todo = []
for bench in benchmarks:
bench_fol = config['INPUT_FOL'].format(split=config['SPLIT'], bench=bench)
seqs_todo += [os.path.join(bench_fol, seq) for seq in os.listdir(bench_fol)]
# Run in parallel
if config['Num_Parallel_Cores']:
with Pool(config['Num_Parallel_Cores']) as pool:
results = pool.map(do_sequence, seqs_todo)
# Run in series
else:
for seq_todo in seqs_todo:
do_sequence(seq_todo)

View File

@@ -0,0 +1,15 @@
from .kitti_2d_box import Kitti2DBox
from .kitti_mots import KittiMOTS
from .mot_challenge_2d_box import MotChallenge2DBox
from .mots_challenge import MOTSChallenge
from .bdd100k import BDD100K
from .davis import DAVIS
from .tao import TAO
from .tao_ow import TAO_OW
from .burst import BURST
from .burst_ow import BURST_OW
from .youtube_vis import YouTubeVIS
from .head_tracking_challenge import HeadTrackingChallenge
from .rob_mots import RobMOTS
from .person_path_22 import PersonPath22
from .visdrone import VisDrone2DBox

View File

@@ -0,0 +1,326 @@
import csv
import io
import zipfile
import os
import traceback
import numpy as np
from copy import deepcopy
from abc import ABC, abstractmethod
from .. import _timing
from ..utils import TrackEvalException
class _BaseDataset(ABC):
@abstractmethod
def __init__(self):
self.tracker_list = None
self.seq_list = None
self.class_list = None
self.output_fol = None
self.output_sub_fol = None
self.should_classes_combine = True
self.use_super_categories = False
# Functions to implement:
@staticmethod
@abstractmethod
def get_default_dataset_config():
...
@abstractmethod
def _load_raw_file(self, tracker, seq, is_gt):
...
@_timing.time
@abstractmethod
def get_preprocessed_seq_data(self, raw_data, cls):
...
@abstractmethod
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
...
# Helper functions for all datasets:
@classmethod
def get_class_name(cls):
return cls.__name__
def get_name(self):
return self.get_class_name()
def get_output_fol(self, tracker):
return os.path.join(self.output_fol, tracker, self.output_sub_fol)
def get_display_name(self, tracker):
""" Can be overwritten if the trackers name (in files) is different to how it should be displayed.
By default this method just returns the trackers name as is.
"""
return tracker
def get_eval_info(self):
"""Return info about the dataset needed for the Evaluator"""
return self.tracker_list, self.seq_list, self.class_list
@_timing.time
def get_raw_seq_data(self, tracker, seq):
""" Loads raw data (tracker and ground-truth) for a single tracker on a single sequence.
Raw data includes all of the information needed for both preprocessing and evaluation, for all classes.
A later function (get_processed_seq_data) will perform such preprocessing and extract relevant information for
the evaluation of each class.
This returns a dict which contains the fields:
[num_timesteps]: integer
[gt_ids, tracker_ids, gt_classes, tracker_classes, tracker_confidences]:
list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
[gt_extras]: dict (for each extra) of lists (for each timestep) of 1D NDArrays (for each det).
gt_extras contains dataset specific information used for preprocessing such as occlusion and truncation levels.
Note that similarities are extracted as part of the dataset and not the metric, because almost all metrics are
independent of the exact method of calculating the similarity. However datasets are not (e.g. segmentation
masks vs 2D boxes vs 3D boxes).
We calculate the similarity before preprocessing because often both preprocessing and evaluation require it and
we don't wish to calculate this twice.
We calculate similarity between all gt and tracker classes (not just each class individually) to allow for
calculation of metrics such as class confusion matrices. Typically the impact of this on performance is low.
"""
# Load raw data.
raw_gt_data = self._load_raw_file(tracker, seq, is_gt=True)
raw_tracker_data = self._load_raw_file(tracker, seq, is_gt=False)
raw_data = {**raw_tracker_data, **raw_gt_data} # Merges dictionaries
# Calculate similarities for each timestep.
similarity_scores = []
for t, (gt_dets_t, tracker_dets_t) in enumerate(zip(raw_data['gt_dets'], raw_data['tracker_dets'])):
ious = self._calculate_similarities(gt_dets_t, tracker_dets_t)
similarity_scores.append(ious)
raw_data['similarity_scores'] = similarity_scores
return raw_data
@staticmethod
def _load_simple_text_file(file, time_col=0, id_col=None, remove_negative_ids=False, valid_filter=None,
crowd_ignore_filter=None, convert_filter=None, is_zipped=False, zip_file=None,
force_delimiters=None):
""" Function that loads data which is in a commonly used text file format.
Assumes each det is given by one row of a text file.
There is no limit to the number or meaning of each column,
however one column needs to give the timestep of each det (time_col) which is default col 0.
The file dialect (deliminator, num cols, etc) is determined automatically.
This function automatically separates dets by timestep,
and is much faster than alternatives such as np.loadtext or pandas.
If remove_negative_ids is True and id_col is not None, dets with negative values in id_col are excluded.
These are not excluded from ignore data.
valid_filter can be used to only include certain classes.
It is a dict with ints as keys, and lists as values,
such that a row is included if "row[key].lower() is in value" for all key/value pairs in the dict.
If None, all classes are included.
crowd_ignore_filter can be used to read crowd_ignore regions separately. It has the same format as valid filter.
convert_filter can be used to convert value read to another format.
This is used most commonly to convert classes given as string to a class id.
This is a dict such that the key is the column to convert, and the value is another dict giving the mapping.
Optionally, input files could be a zip of multiple text files for storage efficiency.
Returns read_data and ignore_data.
Each is a dict (with keys as timesteps as strings) of lists (over dets) of lists (over column values).
Note that all data is returned as strings, and must be converted to float/int later if needed.
Note that timesteps will not be present in the returned dict keys if there are no dets for them
"""
if remove_negative_ids and id_col is None:
raise TrackEvalException('remove_negative_ids is True, but id_col is not given.')
if crowd_ignore_filter is None:
crowd_ignore_filter = {}
if convert_filter is None:
convert_filter = {}
try:
if is_zipped: # Either open file directly or within a zip.
if zip_file is None:
raise TrackEvalException('is_zipped set to True, but no zip_file is given.')
archive = zipfile.ZipFile(os.path.join(zip_file), 'r')
fp = io.TextIOWrapper(archive.open(file, 'r'))
else:
fp = open(file)
read_data = {}
crowd_ignore_data = {}
fp.seek(0, os.SEEK_END)
# check if file is empty
if fp.tell():
fp.seek(0)
dialect = csv.Sniffer().sniff(fp.readline(), delimiters=force_delimiters) # Auto determine structure.
dialect.skipinitialspace = True # Deal with extra spaces between columns
fp.seek(0)
reader = csv.reader(fp, dialect)
for row in reader:
try:
# Deal with extra trailing spaces at the end of rows
if row[-1] in '':
row = row[:-1]
timestep = str(int(float(row[time_col])))
# Read ignore regions separately.
is_ignored = False
for ignore_key, ignore_value in crowd_ignore_filter.items():
if row[ignore_key].lower() in ignore_value:
# Convert values in one column (e.g. string to id)
for convert_key, convert_value in convert_filter.items():
row[convert_key] = convert_value[row[convert_key].lower()]
# Save data separated by timestep.
if timestep in crowd_ignore_data.keys():
crowd_ignore_data[timestep].append(row)
else:
crowd_ignore_data[timestep] = [row]
is_ignored = True
if is_ignored: # if det is an ignore region, it cannot be a normal det.
continue
# Exclude some dets if not valid.
if valid_filter is not None:
for key, value in valid_filter.items():
if row[key].lower() not in value:
continue
if remove_negative_ids:
if int(float(row[id_col])) < 0:
continue
# Convert values in one column (e.g. string to id)
for convert_key, convert_value in convert_filter.items():
row[convert_key] = convert_value[row[convert_key].lower()]
# Save data separated by timestep.
if timestep in read_data.keys():
read_data[timestep].append(row)
else:
read_data[timestep] = [row]
except Exception:
exc_str_init = 'In file %s the following line cannot be read correctly: \n' % os.path.basename(
file)
exc_str = ' '.join([exc_str_init]+row)
raise TrackEvalException(exc_str)
fp.close()
except Exception:
print('Error loading file: %s, printing traceback.' % file)
traceback.print_exc()
raise TrackEvalException(
'File %s cannot be read because it is either not present or invalidly formatted' % os.path.basename(
file))
return read_data, crowd_ignore_data
@staticmethod
def _calculate_mask_ious(masks1, masks2, is_encoded=False, do_ioa=False):
""" Calculates the IOU (intersection over union) between two arrays of segmentation masks.
If is_encoded a run length encoding with pycocotools is assumed as input format, otherwise an input of numpy
arrays of the shape (num_masks, height, width) is assumed and the encoding is performed.
If do_ioa (intersection over area) , then calculates the intersection over the area of masks1 - this is commonly
used to determine if detections are within crowd ignore region.
:param masks1: first set of masks (numpy array of shape (num_masks, height, width) if not encoded,
else pycocotools rle encoded format)
:param masks2: second set of masks (numpy array of shape (num_masks, height, width) if not encoded,
else pycocotools rle encoded format)
:param is_encoded: whether the input is in pycocotools rle encoded format
:param do_ioa: whether to perform IoA computation
:return: the IoU/IoA scores
"""
# Only loaded when run to reduce minimum requirements
from pycocotools import mask as mask_utils
# use pycocotools for run length encoding of masks
if not is_encoded:
masks1 = mask_utils.encode(np.array(np.transpose(masks1, (1, 2, 0)), order='F'))
masks2 = mask_utils.encode(np.array(np.transpose(masks2, (1, 2, 0)), order='F'))
# use pycocotools for iou computation of rle encoded masks
ious = mask_utils.iou(masks1, masks2, [do_ioa]*len(masks2))
if len(masks1) == 0 or len(masks2) == 0:
ious = np.asarray(ious).reshape(len(masks1), len(masks2))
assert (ious >= 0 - np.finfo('float').eps).all()
assert (ious <= 1 + np.finfo('float').eps).all()
return ious
@staticmethod
def _calculate_box_ious(bboxes1, bboxes2, box_format='xywh', do_ioa=False):
""" Calculates the IOU (intersection over union) between two arrays of boxes.
Allows variable box formats ('xywh' and 'x0y0x1y1').
If do_ioa (intersection over area) , then calculates the intersection over the area of boxes1 - this is commonly
used to determine if detections are within crowd ignore region.
"""
if box_format in 'xywh':
# layout: (x0, y0, w, h)
bboxes1 = deepcopy(bboxes1)
bboxes2 = deepcopy(bboxes2)
bboxes1[:, 2] = bboxes1[:, 0] + bboxes1[:, 2]
bboxes1[:, 3] = bboxes1[:, 1] + bboxes1[:, 3]
bboxes2[:, 2] = bboxes2[:, 0] + bboxes2[:, 2]
bboxes2[:, 3] = bboxes2[:, 1] + bboxes2[:, 3]
elif box_format not in 'x0y0x1y1':
raise (TrackEvalException('box_format %s is not implemented' % box_format))
# layout: (x0, y0, x1, y1)
min_ = np.minimum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
max_ = np.maximum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
intersection = np.maximum(min_[..., 2] - max_[..., 0], 0) * np.maximum(min_[..., 3] - max_[..., 1], 0)
area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
if do_ioa:
ioas = np.zeros_like(intersection)
valid_mask = area1 > 0 + np.finfo('float').eps
ioas[valid_mask, :] = intersection[valid_mask, :] / area1[valid_mask][:, np.newaxis]
return ioas
else:
area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
union = area1[:, np.newaxis] + area2[np.newaxis, :] - intersection
intersection[area1 <= 0 + np.finfo('float').eps, :] = 0
intersection[:, area2 <= 0 + np.finfo('float').eps] = 0
intersection[union <= 0 + np.finfo('float').eps] = 0
union[union <= 0 + np.finfo('float').eps] = 1
ious = intersection / union
return ious
@staticmethod
def _calculate_euclidean_similarity(dets1, dets2, zero_distance=2.0):
""" Calculates the euclidean distance between two sets of detections, and then converts this into a similarity
measure with values between 0 and 1 using the following formula: sim = max(0, 1 - dist/zero_distance).
The default zero_distance of 2.0, corresponds to the default used in MOT15_3D, such that a 0.5 similarity
threshold corresponds to a 1m distance threshold for TPs.
"""
dist = np.linalg.norm(dets1[:, np.newaxis]-dets2[np.newaxis, :], axis=2)
sim = np.maximum(0, 1 - dist/zero_distance)
return sim
@staticmethod
def _check_unique_ids(data, after_preproc=False):
"""Check the requirement that the tracker_ids and gt_ids are unique per timestep"""
gt_ids = data['gt_ids']
tracker_ids = data['tracker_ids']
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(gt_ids, tracker_ids)):
if len(tracker_ids_t) > 0:
unique_ids, counts = np.unique(tracker_ids_t, return_counts=True)
if np.max(counts) != 1:
duplicate_ids = unique_ids[counts > 1]
exc_str_init = 'Tracker predicts the same ID more than once in a single timestep ' \
'(seq: %s, frame: %i, ids:' % (data['seq'], t+1)
exc_str = ' '.join([exc_str_init] + [str(d) for d in duplicate_ids]) + ')'
if after_preproc:
exc_str_init += '\n Note that this error occurred after preprocessing (but not before), ' \
'so ids may not be as in file, and something seems wrong with preproc.'
raise TrackEvalException(exc_str)
if len(gt_ids_t) > 0:
unique_ids, counts = np.unique(gt_ids_t, return_counts=True)
if np.max(counts) != 1:
duplicate_ids = unique_ids[counts > 1]
exc_str_init = 'Ground-truth has the same ID more than once in a single timestep ' \
'(seq: %s, frame: %i, ids:' % (data['seq'], t+1)
exc_str = ' '.join([exc_str_init] + [str(d) for d in duplicate_ids]) + ')'
if after_preproc:
exc_str_init += '\n Note that this error occurred after preprocessing (but not before), ' \
'so ids may not be as in file, and something seems wrong with preproc.'
raise TrackEvalException(exc_str)

View File

@@ -0,0 +1,302 @@
import os
import json
import numpy as np
from scipy.optimize import linear_sum_assignment
from ..utils import TrackEvalException
from ._base_dataset import _BaseDataset
from .. import utils
from .. import _timing
class BDD100K(_BaseDataset):
"""Dataset class for BDD100K tracking"""
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/bdd100k/bdd100k_val'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/bdd100k/bdd100k_val'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'CLASSES_TO_EVAL': ['pedestrian', 'rider', 'car', 'bus', 'truck', 'train', 'motorcycle', 'bicycle'],
# Valid: ['pedestrian', 'rider', 'car', 'bus', 'truck', 'train', 'motorcycle', 'bicycle']
'SPLIT_TO_EVAL': 'val', # Valid: 'training', 'val',
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
'PRINT_CONFIG': True, # Whether to print current config
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
self.gt_fol = self.config['GT_FOLDER']
self.tracker_fol = self.config['TRACKERS_FOLDER']
self.should_classes_combine = True
self.use_super_categories = True
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
# Get classes to eval
self.valid_classes = ['pedestrian', 'rider', 'car', 'bus', 'truck', 'train', 'motorcycle', 'bicycle']
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
for cls in self.config['CLASSES_TO_EVAL']]
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes [pedestrian, rider, car, '
'bus, truck, train, motorcycle, bicycle] are valid.')
self.super_categories = {"HUMAN": [cls for cls in ["pedestrian", "rider"] if cls in self.class_list],
"VEHICLE": [cls for cls in ["car", "truck", "bus", "train"] if cls in self.class_list],
"BIKE": [cls for cls in ["motorcycle", "bicycle"] if cls in self.class_list]}
self.distractor_classes = ['other person', 'trailer', 'other vehicle']
self.class_name_to_class_id = {'pedestrian': 1, 'rider': 2, 'other person': 3, 'car': 4, 'bus': 5, 'truck': 6,
'train': 7, 'trailer': 8, 'other vehicle': 9, 'motorcycle': 10, 'bicycle': 11}
# Get sequences to eval
self.seq_list = []
self.seq_lengths = {}
self.seq_list = [seq_file.replace('.json', '') for seq_file in os.listdir(self.gt_fol)]
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
for tracker in self.tracker_list:
for seq in self.seq_list:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.json')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException(
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
curr_file))
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the BDD100K format
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
"""
# File location
if is_gt:
file = os.path.join(self.gt_fol, seq + '.json')
else:
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.json')
with open(file) as f:
data = json.load(f)
# sort data by frame index
data = sorted(data, key=lambda x: x['index'])
# check sequence length
if is_gt:
self.seq_lengths[seq] = len(data)
num_timesteps = len(data)
else:
num_timesteps = self.seq_lengths[seq]
if num_timesteps != len(data):
raise TrackEvalException('Number of ground truth and tracker timesteps do not match for sequence %s'
% seq)
# Convert data to required format
data_keys = ['ids', 'classes', 'dets']
if is_gt:
data_keys += ['gt_crowd_ignore_regions']
raw_data = {key: [None] * num_timesteps for key in data_keys}
for t in range(num_timesteps):
ig_ids = []
keep_ids = []
for i in range(len(data[t]['labels'])):
ann = data[t]['labels'][i]
if is_gt and (ann['category'] in self.distractor_classes or 'attributes' in ann.keys()
and ann['attributes']['Crowd']):
ig_ids.append(i)
else:
keep_ids.append(i)
if keep_ids:
raw_data['dets'][t] = np.atleast_2d([[data[t]['labels'][i]['box2d']['x1'],
data[t]['labels'][i]['box2d']['y1'],
data[t]['labels'][i]['box2d']['x2'],
data[t]['labels'][i]['box2d']['y2']
] for i in keep_ids]).astype(float)
raw_data['ids'][t] = np.atleast_1d([data[t]['labels'][i]['id'] for i in keep_ids]).astype(int)
raw_data['classes'][t] = np.atleast_1d([self.class_name_to_class_id[data[t]['labels'][i]['category']]
for i in keep_ids]).astype(int)
else:
raw_data['dets'][t] = np.empty((0, 4)).astype(float)
raw_data['ids'][t] = np.empty(0).astype(int)
raw_data['classes'][t] = np.empty(0).astype(int)
if is_gt:
if ig_ids:
raw_data['gt_crowd_ignore_regions'][t] = np.atleast_2d([[data[t]['labels'][i]['box2d']['x1'],
data[t]['labels'][i]['box2d']['y1'],
data[t]['labels'][i]['box2d']['x2'],
data[t]['labels'][i]['box2d']['y2']
] for i in ig_ids]).astype(float)
else:
raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4)).astype(float)
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data['num_timesteps'] = num_timesteps
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
BDD100K:
In BDD100K, the 4 preproc steps are as follow:
1) There are eight classes (pedestrian, rider, car, bus, truck, train, motorcycle, bicycle)
which are evaluated separately.
2) For BDD100K there is no removal of matched tracker dets.
3) Crowd ignore regions are used to remove unmatched detections.
4) No removal of gt dets.
"""
cls_id = self.class_name_to_class_id[cls]
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Only extract relevant dets for this class for preproc and eval (cls)
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
gt_class_mask = gt_class_mask.astype(np.bool)
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
tracker_class_mask = tracker_class_mask.astype(np.bool)
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
# Match tracker and gt dets (with hungarian algorithm)
unmatched_indices = np.arange(tracker_ids.shape[0])
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
matching_scores = similarity_scores.copy()
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
match_rows, match_cols = linear_sum_assignment(-matching_scores)
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
match_cols = match_cols[actually_matched_mask]
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
# For unmatched tracker dets, remove those that are greater than 50% within a crowd ignore region.
unmatched_tracker_dets = tracker_dets[unmatched_indices, :]
crowd_ignore_regions = raw_data['gt_crowd_ignore_regions'][t]
intersection_with_ignore_region = self._calculate_box_ious(unmatched_tracker_dets, crowd_ignore_regions,
box_format='x0y0x1y1', do_ioa=True)
is_within_crowd_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps,
axis=1)
# Apply preprocessing to remove unwanted tracker dets.
to_remove_tracker = unmatched_indices[is_within_crowd_ignore_region]
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
data['gt_ids'][t] = gt_ids
data['gt_dets'][t] = gt_dets
data['similarity_scores'][t] = similarity_scores
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
# Ensure that ids are unique per timestep.
self._check_unique_ids(data)
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='x0y0x1y1')
return similarity_scores

View File

@@ -0,0 +1,49 @@
import os
from .burst_helpers.burst_base import BURSTBase
from .burst_helpers.format_converter import GroundTruthBURSTFormatToTAOFormatConverter, PredictionBURSTFormatToTAOFormatConverter
from .. import utils
class BURST(BURSTBase):
"""Dataset class for TAO tracking"""
@staticmethod
def get_default_dataset_config():
tao_config = BURSTBase.get_default_dataset_config()
code_path = utils.get_code_path()
# e.g. 'data/gt/tsunami/exemplar_guided/'
tao_config['GT_FOLDER'] = os.path.join(
code_path, 'data/gt/burst/val/') # Location of GT data
# e.g. 'data/trackers/tsunami/exemplar_guided/mask_guided/validation/'
tao_config['TRACKERS_FOLDER'] = os.path.join(
code_path, 'data/trackers/burst/class-guided/') # Trackers location
# set to True or False
tao_config['EXEMPLAR_GUIDED'] = False
return tao_config
def _iou_type(self):
return 'mask'
def _box_or_mask_from_det(self, det):
return det['segmentation']
def _calculate_area_for_ann(self, ann):
import pycocotools.mask as cocomask
return cocomask.area(ann["segmentation"])
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
return similarity_scores
def _is_exemplar_guided(self):
exemplar_guided = self.config['EXEMPLAR_GUIDED']
return exemplar_guided
def _postproc_ground_truth_data(self, data):
return GroundTruthBURSTFormatToTAOFormatConverter(data).convert()
def _postproc_prediction_data(self, data):
return PredictionBURSTFormatToTAOFormatConverter(
self.gt_data, data,
exemplar_guided=self._is_exemplar_guided()).convert()

View File

@@ -0,0 +1,7 @@
The track ids in both ground truth and predictions are not globally unique, but
start from 1 for each video. At the moment when converting from Ali format to
TAO format, we remap the ids to be globally unique. It would be better to
directly have this in the data though.
Improve setting of EXEMPLAR_GUIDED flag, maybe this can be done automatically.

View File

@@ -0,0 +1,591 @@
import os
import numpy as np
import json
import itertools
from collections import defaultdict
from scipy.optimize import linear_sum_assignment
from trackeval.utils import TrackEvalException
from trackeval.datasets._base_dataset import _BaseDataset
from trackeval import utils
from trackeval import _timing
class BURSTBase(_BaseDataset):
"""Dataset class for TAO tracking"""
def _postproc_ground_truth_data(self, data):
return data
def _postproc_prediction_data(self, data):
return data
def _iou_type(self):
return 'bbox'
def _box_or_mask_from_det(self, det):
return np.atleast_1d(det['bbox'])
def _calculate_area_for_ann(self, ann):
return ann["bbox"][2] * ann["bbox"][3]
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes)
'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val'
'PRINT_CONFIG': True, # Whether to print current config
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
'MAX_DETECTIONS': 300, # Number of maximal allowed detections per image (0 for unlimited)
'EXEMPLAR_GUIDED': False,
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
self.gt_fol = self.config['GT_FOLDER']
self.tracker_fol = self.config['TRACKERS_FOLDER']
self.should_classes_combine = True
self.use_super_categories = False
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
if len(gt_dir_files) != 1:
raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
self.gt_data = self._postproc_ground_truth_data(json.load(f))
# merge categories marked with a merged tag in TAO dataset
self._merge_categories(self.gt_data['annotations'] + self.gt_data['tracks'])
# Get sequences to eval and sequence information
self.seq_list = [vid['name'].replace('/', '-') for vid in self.gt_data['videos']]
self.seq_name_to_seq_id = {vid['name'].replace('/', '-'): vid['id'] for vid in self.gt_data['videos']}
# compute mappings from videos to annotation data
self.videos_to_gt_tracks, self.videos_to_gt_images = self._compute_vid_mappings(self.gt_data['annotations'])
# compute sequence lengths
self.seq_lengths = {vid['id']: 0 for vid in self.gt_data['videos']}
for img in self.gt_data['images']:
self.seq_lengths[img['video_id']] += 1
self.seq_to_images_to_timestep = self._compute_image_to_timestep_mappings()
self.seq_to_classes = {vid['id']: {'pos_cat_ids': list({track['category_id'] for track
in self.videos_to_gt_tracks[vid['id']]}),
'neg_cat_ids': vid['neg_category_ids'],
'not_exhaustively_labeled_cat_ids': vid['not_exhaustive_category_ids']}
for vid in self.gt_data['videos']}
# Get classes to eval
considered_vid_ids = [self.seq_name_to_seq_id[vid] for vid in self.seq_list]
seen_cats = set([cat_id for vid_id in considered_vid_ids for cat_id
in self.seq_to_classes[vid_id]['pos_cat_ids']])
# only classes with ground truth are evaluated in TAO, also we don't evaluate distactors.
distractors = {20, 63, 108, 180, 188, 204, 212, 247, 303, 403, 407, 415, 490, 504, 507, 513, 529, 567,
569, 588, 672, 691, 702, 708, 711, 720, 736, 737, 798, 813, 815, 827, 831, 851, 877, 883,
912, 971, 976, 1130, 1133, 1134, 1169, 1184, 1220}
self.valid_classes = [cls['name'] for cls in self.gt_data['categories'] if (cls['id'] in seen_cats) and (cls['id'] not in distractors)]
cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
if self.config['CLASSES_TO_EVAL']:
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
for cls in self.config['CLASSES_TO_EVAL']]
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
', '.join(self.valid_classes) +
' are valid (classes present in ground truth data).')
else:
self.class_list = [cls for cls in self.valid_classes]
self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
for tracker in self.tracker_list:
tr_dir_files = [file for file in os.listdir(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol))
if file.endswith('.json')]
if len(tr_dir_files) != 1:
raise TrackEvalException(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
+ ' does not contain exactly one json file.')
with open(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, tr_dir_files[0])) as f:
curr_data = self._postproc_prediction_data(json.load(f))
# limit detections if MAX_DETECTIONS > 0
if self.config['MAX_DETECTIONS']:
curr_data = self._limit_dets_per_image(curr_data)
# fill missing video ids
self._fill_video_ids_inplace(curr_data)
# make track ids unique over whole evaluation set
self._make_track_ids_unique(curr_data)
# merge categories marked with a merged tag in TAO dataset
self._merge_categories(curr_data)
# get tracker sequence information
curr_videos_to_tracker_tracks, curr_videos_to_tracker_images = self._compute_vid_mappings(curr_data)
self.tracker_data[tracker]['vids_to_tracks'] = curr_videos_to_tracker_tracks
self.tracker_data[tracker]['vids_to_images'] = curr_videos_to_tracker_images
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the TAO format
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets]: list (for each timestep) of lists of detections.
[classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
keys and corresponding segmentations as values) for each track
[classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_lengths]: dictionary with class values
as keys and lists (for each track) as values
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
[classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
keys and corresponding segmentations as values) for each track
[classes_to_dt_track_ids, classes_to_dt_track_areas, classes_to_dt_track_lengths]: dictionary with class values
as keys and lists as values
[classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
"""
seq_id = self.seq_name_to_seq_id[seq]
# File location
if is_gt:
imgs = self.videos_to_gt_images[seq_id]
else:
imgs = self.tracker_data[tracker]['vids_to_images'][seq_id]
# Convert data to required format
num_timesteps = self.seq_lengths[seq_id]
img_to_timestep = self.seq_to_images_to_timestep[seq_id]
data_keys = ['ids', 'classes', 'dets']
if not is_gt:
data_keys += ['tracker_confidences']
raw_data = {key: [None] * num_timesteps for key in data_keys}
for img in imgs:
# some tracker data contains images without any ground truth information, these are ignored
try:
t = img_to_timestep[img['id']]
except KeyError:
continue
annotations = img['annotations']
raw_data['dets'][t] = np.atleast_2d([ann['bbox'] for ann in annotations]).astype(float)
raw_data['ids'][t] = np.atleast_1d([ann['track_id'] for ann in annotations]).astype(int)
raw_data['classes'][t] = np.atleast_1d([ann['category_id'] for ann in annotations]).astype(int)
if not is_gt:
raw_data['tracker_confidences'][t] = np.atleast_1d([ann['score'] for ann in annotations]).astype(float)
for t, d in enumerate(raw_data['dets']):
if d is None:
raw_data['dets'][t] = np.empty((0, 4)).astype(float)
raw_data['ids'][t] = np.empty(0).astype(int)
raw_data['classes'][t] = np.empty(0).astype(int)
if not is_gt:
raw_data['tracker_confidences'][t] = np.empty(0)
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
all_classes = [self.class_name_to_class_id[cls] for cls in self.class_list]
if is_gt:
classes_to_consider = all_classes
all_tracks = self.videos_to_gt_tracks[seq_id]
else:
classes_to_consider = self.seq_to_classes[seq_id]['pos_cat_ids'] \
+ self.seq_to_classes[seq_id]['neg_cat_ids']
all_tracks = self.tracker_data[tracker]['vids_to_tracks'][seq_id]
classes_to_tracks = {cls: [track for track in all_tracks if track['category_id'] == cls]
if cls in classes_to_consider else [] for cls in all_classes}
# mapping from classes to track information
raw_data['classes_to_tracks'] = {cls: [{det['image_id']: self._box_or_mask_from_det(det)
for det in track['annotations']} for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_lengths'] = {cls: [len(track['annotations']) for track in tracks]
for cls, tracks in classes_to_tracks.items()}
if not is_gt:
raw_data['classes_to_dt_track_scores'] = {cls: np.array([np.mean([float(x['score'])
for x in track['annotations']])
for track in tracks])
for cls, tracks in classes_to_tracks.items()}
if is_gt:
key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
'classes_to_track_ids': 'classes_to_gt_track_ids',
'classes_to_track_lengths': 'classes_to_gt_track_lengths',
'classes_to_track_areas': 'classes_to_gt_track_areas'}
else:
key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
'classes_to_track_ids': 'classes_to_dt_track_ids',
'classes_to_track_lengths': 'classes_to_dt_track_lengths',
'classes_to_track_areas': 'classes_to_dt_track_areas'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data['num_timesteps'] = num_timesteps
raw_data['neg_cat_ids'] = self.seq_to_classes[seq_id]['neg_cat_ids']
raw_data['not_exhaustively_labeled_cls'] = self.seq_to_classes[seq_id]['not_exhaustively_labeled_cat_ids']
raw_data['seq'] = seq
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
TAO:
In TAO, the 4 preproc steps are as follow:
1) All classes present in the ground truth data are evaluated separately.
2) No matched tracker detections are removed.
3) Unmatched tracker detections are removed if there is not ground truth data and the class does not
belong to the categories marked as negative for this sequence. Additionally, unmatched tracker
detections for classes which are marked as not exhaustively labeled are removed.
4) No gt detections are removed.
Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
and the tracks from the tracker data are sorted according to the tracker confidence.
"""
cls_id = self.class_name_to_class_id[cls]
is_not_exhaustively_labeled = cls_id in raw_data['not_exhaustively_labeled_cls']
is_neg_category = cls_id in raw_data['neg_cat_ids']
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Only extract relevant dets for this class for preproc and eval (cls)
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
gt_class_mask = gt_class_mask.astype(np.bool)
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
tracker_class_mask = tracker_class_mask.astype(np.bool)
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
if not self.config['EXEMPLAR_GUIDED']:
# Match tracker and gt dets (with hungarian algorithm).
unmatched_indices = np.arange(tracker_ids.shape[0])
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
matching_scores = similarity_scores.copy()
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
match_rows, match_cols = linear_sum_assignment(-matching_scores)
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
match_cols = match_cols[actually_matched_mask]
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
if gt_ids.shape[0] == 0 and not is_neg_category:
to_remove_tracker = unmatched_indices
elif is_not_exhaustively_labeled:
to_remove_tracker = unmatched_indices
else:
to_remove_tracker = np.array([], dtype=np.int)
# remove all unwanted unmatched tracker detections
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
else:
data['tracker_ids'][t] = tracker_ids
data['tracker_dets'][t] = tracker_dets
data['tracker_confidences'][t] = tracker_confidences
data['gt_ids'][t] = gt_ids
data['gt_dets'][t] = gt_dets
data['similarity_scores'][t] = similarity_scores
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
data['seq'] = raw_data['seq']
# get track representations
data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
data['gt_track_lengths'] = raw_data['classes_to_gt_track_lengths'][cls_id]
data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
data['dt_track_lengths'] = raw_data['classes_to_dt_track_lengths'][cls_id]
data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
data['not_exhaustively_labeled'] = is_not_exhaustively_labeled
data['iou_type'] = self._iou_type()
# sort tracker data tracks by tracker confidence scores
if data['dt_tracks']:
idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
data['dt_track_lengths'] = [data['dt_track_lengths'][i] for i in idx]
data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
# Ensure that ids are unique per timestep.
self._check_unique_ids(data)
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t)
return similarity_scores
def _merge_categories(self, annotations):
"""
Merges categories with a merged tag. Adapted from https://github.com/TAO-Dataset
:param annotations: the annotations in which the classes should be merged
:return: None
"""
merge_map = {}
for category in self.gt_data['categories']:
if 'merged' in category:
for to_merge in category['merged']:
merge_map[to_merge['id']] = category['id']
for ann in annotations:
ann['category_id'] = merge_map.get(ann['category_id'], ann['category_id'])
def _compute_vid_mappings(self, annotations):
"""
Computes mappings from Videos to corresponding tracks and images.
:param annotations: the annotations for which the mapping should be generated
:return: the video-to-track-mapping, the video-to-image-mapping
"""
vids_to_tracks = {}
vids_to_imgs = {}
vid_ids = [vid['id'] for vid in self.gt_data['videos']]
# compute an mapping from image IDs to images
images = {}
for image in self.gt_data['images']:
images[image['id']] = image
for ann in annotations:
ann["area"] = self._calculate_area_for_ann(ann)
vid = ann["video_id"]
if ann["video_id"] not in vids_to_tracks.keys():
vids_to_tracks[ann["video_id"]] = list()
if ann["video_id"] not in vids_to_imgs.keys():
vids_to_imgs[ann["video_id"]] = list()
# Fill in vids_to_tracks
tid = ann["track_id"]
exist_tids = [track["id"] for track in vids_to_tracks[vid]]
try:
index1 = exist_tids.index(tid)
except ValueError:
index1 = -1
if tid not in exist_tids:
curr_track = {"id": tid, "category_id": ann["category_id"],
"video_id": vid, "annotations": [ann]}
vids_to_tracks[vid].append(curr_track)
else:
vids_to_tracks[vid][index1]["annotations"].append(ann)
# Fill in vids_to_imgs
img_id = ann['image_id']
exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
try:
index2 = exist_img_ids.index(img_id)
except ValueError:
index2 = -1
if index2 == -1:
curr_img = {"id": img_id, "annotations": [ann]}
vids_to_imgs[vid].append(curr_img)
else:
vids_to_imgs[vid][index2]["annotations"].append(ann)
# sort annotations by frame index and compute track area
for vid, tracks in vids_to_tracks.items():
for track in tracks:
track["annotations"] = sorted(
track['annotations'],
key=lambda x: images[x['image_id']]['frame_index'])
# Computer average area
track["area"] = (sum(x['area'] for x in track['annotations']) / len(track['annotations']))
# Ensure all videos are present
for vid_id in vid_ids:
if vid_id not in vids_to_tracks.keys():
vids_to_tracks[vid_id] = []
if vid_id not in vids_to_imgs.keys():
vids_to_imgs[vid_id] = []
return vids_to_tracks, vids_to_imgs
def _compute_image_to_timestep_mappings(self):
"""
Computes a mapping from images to the corresponding timestep in the sequence.
:return: the image-to-timestep-mapping
"""
images = {}
for image in self.gt_data['images']:
images[image['id']] = image
seq_to_imgs_to_timestep = {vid['id']: dict() for vid in self.gt_data['videos']}
for vid in seq_to_imgs_to_timestep:
curr_imgs = [img['id'] for img in self.videos_to_gt_images[vid]]
curr_imgs = sorted(curr_imgs, key=lambda x: images[x]['frame_index'])
seq_to_imgs_to_timestep[vid] = {curr_imgs[i]: i for i in range(len(curr_imgs))}
return seq_to_imgs_to_timestep
def _limit_dets_per_image(self, annotations):
"""
Limits the number of detections for each image to config['MAX_DETECTIONS']. Adapted from
https://github.com/TAO-Dataset/
:param annotations: the annotations in which the detections should be limited
:return: the annotations with limited detections
"""
max_dets = self.config['MAX_DETECTIONS']
img_ann = defaultdict(list)
for ann in annotations:
img_ann[ann["image_id"]].append(ann)
for img_id, _anns in img_ann.items():
if len(_anns) <= max_dets:
continue
_anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
img_ann[img_id] = _anns[:max_dets]
return [ann for anns in img_ann.values() for ann in anns]
def _fill_video_ids_inplace(self, annotations):
"""
Fills in missing video IDs inplace. Adapted from https://github.com/TAO-Dataset/
:param annotations: the annotations for which the videos IDs should be filled inplace
:return: None
"""
missing_video_id = [x for x in annotations if 'video_id' not in x]
if missing_video_id:
image_id_to_video_id = {
x['id']: x['video_id'] for x in self.gt_data['images']
}
for x in missing_video_id:
x['video_id'] = image_id_to_video_id[x['image_id']]
@staticmethod
def _make_track_ids_unique(annotations):
"""
Makes the track IDs unqiue over the whole annotation set. Adapted from https://github.com/TAO-Dataset/
:param annotations: the annotation set
:return: the number of updated IDs
"""
track_id_videos = {}
track_ids_to_update = set()
max_track_id = 0
for ann in annotations:
t = ann['track_id']
if t not in track_id_videos:
track_id_videos[t] = ann['video_id']
if ann['video_id'] != track_id_videos[t]:
# Track id is assigned to multiple videos
track_ids_to_update.add(t)
max_track_id = max(max_track_id, t)
if track_ids_to_update:
#print('true')
next_id = itertools.count(max_track_id + 1)
new_track_ids = defaultdict(lambda: next(next_id))
for ann in annotations:
t = ann['track_id']
v = ann['video_id']
if t in track_ids_to_update:
ann['track_id'] = new_track_ids[t, v]
return len(track_ids_to_update)

View File

@@ -0,0 +1,675 @@
import os
import numpy as np
import json
import itertools
from collections import defaultdict
from scipy.optimize import linear_sum_assignment
from trackeval.utils import TrackEvalException
from trackeval.datasets._base_dataset import _BaseDataset
from trackeval import utils
from trackeval import _timing
class BURST_OW_Base(_BaseDataset):
"""Dataset class for TAO tracking"""
def _postproc_ground_truth_data(self, data):
return data
def _postproc_prediction_data(self, data):
return data
def _iou_type(self):
return 'bbox'
def _box_or_mask_from_det(self, det):
return np.atleast_1d(det['bbox'])
def _calculate_area_for_ann(self, ann):
return ann["bbox"][2] * ann["bbox"][3]
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes)
'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val'
'PRINT_CONFIG': True, # Whether to print current config
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
'MAX_DETECTIONS': 300, # Number of maximal allowed detections per image (0 for unlimited)
'SUBSET': 'all'
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
self.gt_fol = self.config['GT_FOLDER']
self.tracker_fol = self.config['TRACKERS_FOLDER']
self.should_classes_combine = True
self.use_super_categories = False
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
if len(gt_dir_files) != 1:
raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
self.gt_data = self._postproc_ground_truth_data(json.load(f))
self.subset = self.config['SUBSET']
if self.subset != 'all':
# Split GT data into `known`, `unknown` or `distractor`
self._split_known_unknown_distractor()
self.gt_data = self._filter_gt_data(self.gt_data)
# merge categories marked with a merged tag in TAO dataset
self._merge_categories(self.gt_data['annotations'] + self.gt_data['tracks'])
# Get sequences to eval and sequence information
self.seq_list = [vid['name'].replace('/', '-') for vid in self.gt_data['videos']]
self.seq_name_to_seq_id = {vid['name'].replace('/', '-'): vid['id'] for vid in self.gt_data['videos']}
# compute mappings from videos to annotation data
self.videos_to_gt_tracks, self.videos_to_gt_images = self._compute_vid_mappings(self.gt_data['annotations'])
# compute sequence lengths
self.seq_lengths = {vid['id']: 0 for vid in self.gt_data['videos']}
for img in self.gt_data['images']:
self.seq_lengths[img['video_id']] += 1
self.seq_to_images_to_timestep = self._compute_image_to_timestep_mappings()
self.seq_to_classes = {vid['id']: {'pos_cat_ids': list({track['category_id'] for track
in self.videos_to_gt_tracks[vid['id']]}),
'neg_cat_ids': vid['neg_category_ids'],
'not_exhaustively_labeled_cat_ids': vid['not_exhaustive_category_ids']}
for vid in self.gt_data['videos']}
# Get classes to eval
considered_vid_ids = [self.seq_name_to_seq_id[vid] for vid in self.seq_list]
seen_cats = set([cat_id for vid_id in considered_vid_ids for cat_id
in self.seq_to_classes[vid_id]['pos_cat_ids']])
# only classes with ground truth are evaluated in TAO
self.valid_classes = [cls['name'] for cls in self.gt_data['categories'] if cls['id'] in seen_cats]
# cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
if self.config['CLASSES_TO_EVAL']:
# self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
# for cls in self.config['CLASSES_TO_EVAL']]
self.class_list = ["object"] # class-agnostic
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
', '.join(self.valid_classes) +
' are valid (classes present in ground truth data).')
else:
# self.class_list = [cls for cls in self.valid_classes]
self.class_list = ["object"] # class-agnostic
# self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
self.class_name_to_class_id = {"object": 1} # class-agnostic
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
for tracker in self.tracker_list:
tr_dir_files = [file for file in os.listdir(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol))
if file.endswith('.json')]
if len(tr_dir_files) != 1:
raise TrackEvalException(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
+ ' does not contain exactly one json file.')
with open(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, tr_dir_files[0])) as f:
curr_data = self._postproc_prediction_data(json.load(f))
# limit detections if MAX_DETECTIONS > 0
if self.config['MAX_DETECTIONS']:
curr_data = self._limit_dets_per_image(curr_data)
# fill missing video ids
self._fill_video_ids_inplace(curr_data)
# make track ids unique over whole evaluation set
self._make_track_ids_unique(curr_data)
# merge categories marked with a merged tag in TAO dataset
self._merge_categories(curr_data)
# get tracker sequence information
curr_videos_to_tracker_tracks, curr_videos_to_tracker_images = self._compute_vid_mappings(curr_data)
self.tracker_data[tracker]['vids_to_tracks'] = curr_videos_to_tracker_tracks
self.tracker_data[tracker]['vids_to_images'] = curr_videos_to_tracker_images
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the TAO format
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets]: list (for each timestep) of lists of detections.
[classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
keys and corresponding segmentations as values) for each track
[classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_lengths]: dictionary with class values
as keys and lists (for each track) as values
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
[classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
keys and corresponding segmentations as values) for each track
[classes_to_dt_track_ids, classes_to_dt_track_areas, classes_to_dt_track_lengths]: dictionary with class values
as keys and lists as values
[classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
"""
seq_id = self.seq_name_to_seq_id[seq]
# File location
if is_gt:
imgs = self.videos_to_gt_images[seq_id]
else:
imgs = self.tracker_data[tracker]['vids_to_images'][seq_id]
# Convert data to required format
num_timesteps = self.seq_lengths[seq_id]
img_to_timestep = self.seq_to_images_to_timestep[seq_id]
data_keys = ['ids', 'classes', 'dets']
if not is_gt:
data_keys += ['tracker_confidences']
raw_data = {key: [None] * num_timesteps for key in data_keys}
for img in imgs:
# some tracker data contains images without any ground truth information, these are ignored
try:
t = img_to_timestep[img['id']]
except KeyError:
continue
annotations = img['annotations']
raw_data['dets'][t] = np.atleast_2d([ann['bbox'] for ann in annotations]).astype(float)
raw_data['ids'][t] = np.atleast_1d([ann['track_id'] for ann in annotations]).astype(int)
raw_data['classes'][t] = np.atleast_1d([1 for _ in annotations]).astype(int) # class-agnostic
if not is_gt:
raw_data['tracker_confidences'][t] = np.atleast_1d([ann['score'] for ann in annotations]).astype(float)
for t, d in enumerate(raw_data['dets']):
if d is None:
raw_data['dets'][t] = np.empty((0, 4)).astype(float)
raw_data['ids'][t] = np.empty(0).astype(int)
raw_data['classes'][t] = np.empty(0).astype(int)
if not is_gt:
raw_data['tracker_confidences'][t] = np.empty(0)
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
# all_classes = [self.class_name_to_class_id[cls] for cls in self.class_list]
all_classes = [1] # class-agnostic
if is_gt:
classes_to_consider = all_classes
all_tracks = self.videos_to_gt_tracks[seq_id]
else:
# classes_to_consider = self.seq_to_classes[seq_id]['pos_cat_ids'] \
# + self.seq_to_classes[seq_id]['neg_cat_ids']
classes_to_consider = all_classes # class-agnostic
all_tracks = self.tracker_data[tracker]['vids_to_tracks'][seq_id]
# classes_to_tracks = {cls: [track for track in all_tracks if track['category_id'] == cls]
# if cls in classes_to_consider else [] for cls in all_classes}
classes_to_tracks = {cls: [track for track in all_tracks]
if cls in classes_to_consider else [] for cls in all_classes} # class-agnostic
# mapping from classes to track information
raw_data['classes_to_tracks'] = {cls: [{det['image_id']: self._box_or_mask_from_det(det)
for det in track['annotations']} for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_lengths'] = {cls: [len(track['annotations']) for track in tracks]
for cls, tracks in classes_to_tracks.items()}
if not is_gt:
raw_data['classes_to_dt_track_scores'] = {cls: np.array([np.mean([float(x['score'])
for x in track['annotations']])
for track in tracks])
for cls, tracks in classes_to_tracks.items()}
if is_gt:
key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
'classes_to_track_ids': 'classes_to_gt_track_ids',
'classes_to_track_lengths': 'classes_to_gt_track_lengths',
'classes_to_track_areas': 'classes_to_gt_track_areas'}
else:
key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
'classes_to_track_ids': 'classes_to_dt_track_ids',
'classes_to_track_lengths': 'classes_to_dt_track_lengths',
'classes_to_track_areas': 'classes_to_dt_track_areas'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data['num_timesteps'] = num_timesteps
raw_data['neg_cat_ids'] = self.seq_to_classes[seq_id]['neg_cat_ids']
raw_data['not_exhaustively_labeled_cls'] = self.seq_to_classes[seq_id]['not_exhaustively_labeled_cat_ids']
raw_data['seq'] = seq
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
TAO:
In TAO, the 4 preproc steps are as follow:
1) All classes present in the ground truth data are evaluated separately.
2) No matched tracker detections are removed.
3) Unmatched tracker detections are removed if there is not ground truth data and the class does not
belong to the categories marked as negative for this sequence. Additionally, unmatched tracker
detections for classes which are marked as not exhaustively labeled are removed.
4) No gt detections are removed.
Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
and the tracks from the tracker data are sorted according to the tracker confidence.
"""
cls_id = self.class_name_to_class_id[cls]
is_not_exhaustively_labeled = cls_id in raw_data['not_exhaustively_labeled_cls']
is_neg_category = cls_id in raw_data['neg_cat_ids']
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Only extract relevant dets for this class for preproc and eval (cls)
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
gt_class_mask = gt_class_mask.astype(np.bool)
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
tracker_class_mask = tracker_class_mask.astype(np.bool)
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
# Match tracker and gt dets (with hungarian algorithm).
unmatched_indices = np.arange(tracker_ids.shape[0])
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
matching_scores = similarity_scores.copy()
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
match_rows, match_cols = linear_sum_assignment(-matching_scores)
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
match_cols = match_cols[actually_matched_mask]
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
if gt_ids.shape[0] == 0 and not is_neg_category:
to_remove_tracker = unmatched_indices
elif is_not_exhaustively_labeled:
to_remove_tracker = unmatched_indices
else:
to_remove_tracker = np.array([], dtype=np.int)
# remove all unwanted unmatched tracker detections
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
data['gt_ids'][t] = gt_ids
data['gt_dets'][t] = gt_dets
data['similarity_scores'][t] = similarity_scores
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
data['seq'] = raw_data['seq']
# get track representations
data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
data['gt_track_lengths'] = raw_data['classes_to_gt_track_lengths'][cls_id]
data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
data['dt_track_lengths'] = raw_data['classes_to_dt_track_lengths'][cls_id]
data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
data['not_exhaustively_labeled'] = is_not_exhaustively_labeled
data['iou_type'] = self._iou_type()
# sort tracker data tracks by tracker confidence scores
if data['dt_tracks']:
idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
data['dt_track_lengths'] = [data['dt_track_lengths'][i] for i in idx]
data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
# Ensure that ids are unique per timestep.
self._check_unique_ids(data)
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t)
return similarity_scores
def _merge_categories(self, annotations):
"""
Merges categories with a merged tag. Adapted from https://github.com/TAO-Dataset
:param annotations: the annotations in which the classes should be merged
:return: None
"""
merge_map = {}
for category in self.gt_data['categories']:
if 'merged' in category:
for to_merge in category['merged']:
merge_map[to_merge['id']] = category['id']
for ann in annotations:
ann['category_id'] = merge_map.get(ann['category_id'], ann['category_id'])
def _compute_vid_mappings(self, annotations):
"""
Computes mappings from Videos to corresponding tracks and images.
:param annotations: the annotations for which the mapping should be generated
:return: the video-to-track-mapping, the video-to-image-mapping
"""
vids_to_tracks = {}
vids_to_imgs = {}
vid_ids = [vid['id'] for vid in self.gt_data['videos']]
# compute an mapping from image IDs to images
images = {}
for image in self.gt_data['images']:
images[image['id']] = image
for ann in annotations:
ann["area"] = self._calculate_area_for_ann(ann)
vid = ann["video_id"]
if ann["video_id"] not in vids_to_tracks.keys():
vids_to_tracks[ann["video_id"]] = list()
if ann["video_id"] not in vids_to_imgs.keys():
vids_to_imgs[ann["video_id"]] = list()
# Fill in vids_to_tracks
tid = ann["track_id"]
exist_tids = [track["id"] for track in vids_to_tracks[vid]]
try:
index1 = exist_tids.index(tid)
except ValueError:
index1 = -1
if tid not in exist_tids:
curr_track = {"id": tid, "category_id": ann["category_id"],
"video_id": vid, "annotations": [ann]}
vids_to_tracks[vid].append(curr_track)
else:
vids_to_tracks[vid][index1]["annotations"].append(ann)
# Fill in vids_to_imgs
img_id = ann['image_id']
exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
try:
index2 = exist_img_ids.index(img_id)
except ValueError:
index2 = -1
if index2 == -1:
curr_img = {"id": img_id, "annotations": [ann]}
vids_to_imgs[vid].append(curr_img)
else:
vids_to_imgs[vid][index2]["annotations"].append(ann)
# sort annotations by frame index and compute track area
for vid, tracks in vids_to_tracks.items():
for track in tracks:
track["annotations"] = sorted(
track['annotations'],
key=lambda x: images[x['image_id']]['frame_index'])
# Computer average area
track["area"] = (sum(x['area'] for x in track['annotations']) / len(track['annotations']))
# Ensure all videos are present
for vid_id in vid_ids:
if vid_id not in vids_to_tracks.keys():
vids_to_tracks[vid_id] = []
if vid_id not in vids_to_imgs.keys():
vids_to_imgs[vid_id] = []
return vids_to_tracks, vids_to_imgs
def _compute_image_to_timestep_mappings(self):
"""
Computes a mapping from images to the corresponding timestep in the sequence.
:return: the image-to-timestep-mapping
"""
images = {}
for image in self.gt_data['images']:
images[image['id']] = image
seq_to_imgs_to_timestep = {vid['id']: dict() for vid in self.gt_data['videos']}
for vid in seq_to_imgs_to_timestep:
curr_imgs = [img['id'] for img in self.videos_to_gt_images[vid]]
curr_imgs = sorted(curr_imgs, key=lambda x: images[x]['frame_index'])
seq_to_imgs_to_timestep[vid] = {curr_imgs[i]: i for i in range(len(curr_imgs))}
return seq_to_imgs_to_timestep
def _limit_dets_per_image(self, annotations):
"""
Limits the number of detections for each image to config['MAX_DETECTIONS']. Adapted from
https://github.com/TAO-Dataset/
:param annotations: the annotations in which the detections should be limited
:return: the annotations with limited detections
"""
max_dets = self.config['MAX_DETECTIONS']
img_ann = defaultdict(list)
for ann in annotations:
img_ann[ann["image_id"]].append(ann)
for img_id, _anns in img_ann.items():
if len(_anns) <= max_dets:
continue
_anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
img_ann[img_id] = _anns[:max_dets]
return [ann for anns in img_ann.values() for ann in anns]
def _fill_video_ids_inplace(self, annotations):
"""
Fills in missing video IDs inplace. Adapted from https://github.com/TAO-Dataset/
:param annotations: the annotations for which the videos IDs should be filled inplace
:return: None
"""
missing_video_id = [x for x in annotations if 'video_id' not in x]
if missing_video_id:
image_id_to_video_id = {
x['id']: x['video_id'] for x in self.gt_data['images']
}
for x in missing_video_id:
x['video_id'] = image_id_to_video_id[x['image_id']]
@staticmethod
def _make_track_ids_unique(annotations):
"""
Makes the track IDs unqiue over the whole annotation set. Adapted from https://github.com/TAO-Dataset/
:param annotations: the annotation set
:return: the number of updated IDs
"""
track_id_videos = {}
track_ids_to_update = set()
max_track_id = 0
for ann in annotations:
t = ann['track_id']
if t not in track_id_videos:
track_id_videos[t] = ann['video_id']
if ann['video_id'] != track_id_videos[t]:
# Track id is assigned to multiple videos
track_ids_to_update.add(t)
max_track_id = max(max_track_id, t)
if track_ids_to_update:
#print('true')
next_id = itertools.count(max_track_id + 1)
new_track_ids = defaultdict(lambda: next(next_id))
for ann in annotations:
t = ann['track_id']
v = ann['video_id']
if t in track_ids_to_update:
ann['track_id'] = new_track_ids[t, v]
return len(track_ids_to_update)
def _split_known_unknown_distractor(self):
all_ids = set([i for i in range(1, 2000)]) # 2000 is larger than the max category id in TAO-OW.
# `knowns` includes 78 TAO_category_ids that corresponds to 78 COCO classes.
# (The other 2 COCO classes do not have corresponding classes in TAO).
self.knowns = {4, 13, 1038, 544, 1057, 34, 35, 36, 41, 45, 58, 60, 579, 1091, 1097, 1099, 78, 79, 81, 91, 1115,
1117, 95, 1122, 99, 1132, 621, 1135, 625, 118, 1144, 126, 642, 1155, 133, 1162, 139, 154, 174, 185,
699, 1215, 714, 717, 1229, 211, 729, 221, 229, 747, 235, 237, 779, 276, 805, 299, 829, 852, 347,
371, 382, 896, 392, 926, 937, 428, 429, 961, 452, 979, 980, 982, 475, 480, 993, 1001, 502, 1018}
# `distractors` is defined as in the paper "Opening up Open-World Tracking"
self.distractors = {20, 63, 108, 180, 188, 204, 212, 247, 303, 403, 407, 415, 490, 504, 507, 513, 529, 567,
569, 588, 672, 691, 702, 708, 711, 720, 736, 737, 798, 813, 815, 827, 831, 851, 877, 883,
912, 971, 976, 1130, 1133, 1134, 1169, 1184, 1220}
self.unknowns = all_ids.difference(self.knowns.union(self.distractors))
def _filter_gt_data(self, raw_gt_data):
"""
Filter out irrelevant data in the raw_gt_data
Args:
raw_gt_data: directly loaded from json.
Returns:
filtered gt_data
"""
valid_cat_ids = list()
if self.subset == "known":
valid_cat_ids = self.knowns
elif self.subset == "distractor":
valid_cat_ids = self.distractors
elif self.subset == "unknown":
valid_cat_ids = self.unknowns
# elif self.subset == "test_only_unknowns":
# valid_cat_ids = test_only_unknowns
else:
raise Exception("The parameter `SUBSET` is incorrect")
filtered = dict()
filtered["videos"] = raw_gt_data["videos"]
# filtered["videos"] = list()
unwanted_vid = set()
# for video in raw_gt_data["videos"]:
# datasrc = video["name"].split('/')[1]
# if datasrc in data_srcs:
# filtered["videos"].append(video)
# else:
# unwanted_vid.add(video["id"])
filtered["annotations"] = list()
for ann in raw_gt_data["annotations"]:
if (ann["video_id"] not in unwanted_vid) and (ann["category_id"] in valid_cat_ids):
filtered["annotations"].append(ann)
filtered["tracks"] = list()
for track in raw_gt_data["tracks"]:
if (track["video_id"] not in unwanted_vid) and (track["category_id"] in valid_cat_ids):
filtered["tracks"].append(track)
filtered["images"] = list()
for image in raw_gt_data["images"]:
if image["video_id"] not in unwanted_vid:
filtered["images"].append(image)
filtered["categories"] = list()
for cat in raw_gt_data["categories"]:
if cat["id"] in valid_cat_ids:
filtered["categories"].append(cat)
if "info" in raw_gt_data:
filtered["info"] = raw_gt_data["info"]
if "licenses" in raw_gt_data:
filtered["licenses"] = raw_gt_data["licenses"]
if "track_id_offsets" in raw_gt_data:
filtered["track_id_offsets"] = raw_gt_data["track_id_offsets"]
if "split" in raw_gt_data:
filtered["split"] = raw_gt_data["split"]
return filtered

View File

@@ -0,0 +1,39 @@
import json
import argparse
from .format_converter import GroundTruthBURSTFormatToTAOFormatConverter, PredictionBURSTFormatToTAOFormatConverter
def main(args):
with open(args.gt_input_file) as f:
ali_format_gt = json.load(f)
tao_format_gt = GroundTruthBURSTFormatToTAOFormatConverter(
ali_format_gt, args.split).convert()
with open(args.gt_output_file, 'w') as f:
json.dump(tao_format_gt, f)
if args.pred_input_file is None:
return
with open(args.pred_input_file) as f:
ali_format_pred = json.load(f)
tao_format_pred = PredictionBURSTFormatToTAOFormatConverter(
tao_format_gt, ali_format_pred, args.split,
args.exemplar_guided).convert()
with open(args.pred_output_file, 'w') as f:
json.dump(tao_format_pred, f)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--gt_input_file', type=str,
default='../data/gt/tsunami/exemplar_guided/validation_all_annotations.json')
parser.add_argument('--gt_output_file', type=str,
default='/tmp/val_gt.json')
parser.add_argument('--pred_input_file', type=str,
default='../data/trackers/tsunami/exemplar_guided/STCN_off_the_shelf/data/results.json')
parser.add_argument('--pred_output_file', type=str,
default='/tmp/pred.json')
parser.add_argument('--split', type=str, default='validation')
parser.add_argument('--exemplar_guided', type=bool, default=True)
args_ = parser.parse_args()
main(args_)

View File

@@ -0,0 +1,259 @@
import os
import json
import pycocotools.mask as cocomask
from tabulate import tabulate
from typing import Union
def _global_track_id(*, local_track_id: Union[str, int],
video_id: Union[str, int],
track_id_mapping) -> int:
# remap local track ids into globally unique ids
return track_id_mapping[str(video_id)][str(local_track_id)]
class GroundTruthBURSTFormatToTAOFormatConverter:
def __init__(self, ali_format):
self._ali_format = ali_format
self._split = ali_format['split']
self._categories = self._make_categories()
self._videos = []
self._annotations = []
self._tracks = {}
self._images = []
self._next_img_id = 0
self._next_ann_id = 0
self._track_id_mapping = self._load_track_id_mapping()
for seq in ali_format['sequences']:
self._visit_seq(seq)
def _load_track_id_mapping(self):
id_map = {}
next_global_track_id = 1
for seq in self._ali_format['sequences']:
seq_id = seq['id']
seq_id_map = {}
id_map[str(seq_id)] = seq_id_map
for local_track_id in seq['track_category_ids']:
seq_id_map[str(local_track_id)] = next_global_track_id
next_global_track_id += 1
return id_map
def global_track_id(self, *, local_track_id: Union[str, int],
video_id: Union[str, int]) -> int:
return _global_track_id(local_track_id=local_track_id,
video_id=video_id,
track_id_mapping=self._track_id_mapping)
def _visit_seq(self, seq):
self._make_video(seq)
imgs = self._make_images(seq)
self._make_annotations_and_tracks(seq, imgs)
def _make_images(self, seq):
imgs = []
for img_path in seq['annotated_image_paths']:
video = self._split + '/' + seq['dataset'] + '/' + seq['seq_name']
file_name = video + '/' + img_path
# TODO: once python 3.9 is more common, we can use this nicer and safer code
#stripped = img_path.removesuffix('.jpg').removesuffix('.png').removeprefix('frame')
stripped = img_path.replace('.jpg', '').replace('.png', '').replace('frame', '')
last = stripped.split('_')[-1]
frame_idx = int(last)
img = {'id': self._next_img_id, 'video': video,
'width': seq['width'], 'height': seq['height'],
'file_name': file_name,
'frame_index': frame_idx,
'video_id': seq['id']}
self._next_img_id += 1
self._images.append(img)
imgs.append(img)
return imgs
def _make_video(self, seq):
video_id = seq['id']
dataset = seq['dataset']
seq_name = seq['seq_name']
name = f'{self._split}/' + dataset + '/' + seq_name
video = {
'id': video_id, 'width': seq['width'], 'height': seq['height'],
'neg_category_ids': seq['neg_category_ids'],
'not_exhaustive_category_ids': seq['not_exhaustive_category_ids'],
'name': name, 'metadata': {'dataset': dataset}}
self._videos.append(video)
def _make_annotations_and_tracks(self, seq, imgs):
video_id = seq['id']
segs = seq['segmentations']
assert len(segs) == len(imgs), (len(segs), len(imgs))
for frame_segs, img in zip(segs, imgs):
for local_track_id, seg in frame_segs.items():
distractors = {20, 63, 108, 180, 188, 204, 212, 247, 303, 403, 407, 415, 490, 504, 507, 513, 529, 567,
569, 588, 672, 691, 702, 708, 711, 720, 736, 737, 798, 813, 815, 827, 831, 851, 877, 883,
912, 971, 976, 1130, 1133, 1134, 1169, 1184, 1220}
global_track_id = self.global_track_id(
local_track_id=local_track_id, video_id=seq['id'])
rle = seg['rle']
segmentation = {'counts': rle,
'size': [img['height'], img['width']]}
image_id = img['id']
category_id = int(seq['track_category_ids'][local_track_id])
if category_id in distractors:
continue
coco_bbox = cocomask.toBbox(segmentation)
bbox = [int(x) for x in coco_bbox]
ann = {'segmentation': segmentation, 'id': self._next_ann_id,
'image_id': image_id, 'category_id': category_id,
'track_id': global_track_id, 'video_id': video_id,
'bbox': bbox}
self._next_ann_id += 1
self._annotations.append(ann)
if global_track_id not in self._tracks:
track = {'id': global_track_id, 'category_id': category_id,
'video_id': video_id}
self._tracks[global_track_id] = track
def convert(self):
tracks = sorted(self._tracks.values(), key=lambda t: t['id'])
return {'videos': self._videos, 'annotations': self._annotations,
'tracks': tracks, 'images': self._images,
'categories': self._categories,
'track_id_mapping': self._track_id_mapping,
'split': self._split}
def _make_categories(self):
tao_categories_path = os.path.join(os.path.dirname(__file__), 'tao_categories.json')
with open(tao_categories_path) as f:
return json.load(f)
class PredictionBURSTFormatToTAOFormatConverter:
def __init__(self, gt, ali_format, exemplar_guided):
self._gt = gt
self._ali_format = ali_format
if 'split' in ali_format:
self._split = ali_format['split']
gt_split = self._gt['split']
assert self._split == gt_split, (self._split, gt_split)
else:
self._split = self._gt['split']
self._exemplar_guided = exemplar_guided
self._result = []
self._next_det_id = 0
self._img_by_filename = {}
for img in self._gt['images']:
file_name = img['file_name']
assert file_name not in self._img_by_filename
self._img_by_filename[file_name] = img
self._gt_track_by_track_id = {}
for track in self._gt['tracks']:
self._gt_track_by_track_id[int(track['id'])] = track
self._filtered_out_track_ids = set()
for seq in ali_format['sequences']:
self._visit_seq(seq)
if exemplar_guided and len(self._filtered_out_track_ids) > 0:
self.print_filter_out_debug_info(ali_format)
def print_filter_out_debug_info(self, ali_format):
track_ids_in_pred = set()
a_dict_for_debugging = {}
for seq in ali_format['sequences']:
for local_track_id in seq['track_category_ids']:
global_track_id = _global_track_id(
local_track_id=local_track_id, video_id=seq['id'],
track_id_mapping=self._gt['track_id_mapping'])
track_ids_in_pred.add(global_track_id)
a_dict_for_debugging[global_track_id] = {'seq': seq,
'local_track_id': local_track_id}
print('Number of Track ids in pred:', len(track_ids_in_pred))
print('Exemplar Guided: Filtered out',
len(self._filtered_out_track_ids),
'tracks which were not found in the ground truth.')
track_ids_after_filtering = set(d['track_id'] for d in self._result)
print('Number of tracks after filtering:',
len(track_ids_after_filtering))
problem_tracks = list(
track_ids_in_pred - track_ids_after_filtering - self._filtered_out_track_ids)
if len(problem_tracks) > 0:
print("\nWARNING:", len(problem_tracks),
"object tracks are not present. There could be a number of reasons for this:\n"
"(1) If you are running evaluation for the box/point exemplar-guided task then this is to be expected"
" because your tracker probably didn't predict masks for every ground-truth object instance.\n"
"(2) If you are running evaluation for the mask exemplar-guided task, then this could indicate a "
"problem. Assume that you copied the given first-frame object mask to your predicted result, this "
"should not happen. It could be that your predictions are at the wrong frame-rate i.e. you have no "
"predicted masks for video frames which will be evaluated.\n")
rows = []
for xx in problem_tracks:
rows.append([a_dict_for_debugging[xx]['seq']['dataset'],
a_dict_for_debugging[xx]['seq']['seq_name'],
a_dict_for_debugging[xx]['local_track_id']])
print("For your reference, the sequence name and track IDs for these missing tracks are:")
print(tabulate(rows, ["Dataset", "Sequence Name", "Track ID"]))
def _visit_seq(self, seq):
dataset = seq['dataset']
seq_name = seq['seq_name']
assert len(seq['segmentations']) == len(seq['annotated_image_paths'])
for frame_segs, img_path in zip(seq['segmentations'],
seq['annotated_image_paths']):
for local_track_id_str, track_det in frame_segs.items():
rle = track_det['rle']
file_name = self._split + '/' + dataset + '/' + seq_name + '/' + img_path
# the result might have a higher frame rate than the ground truth
if file_name not in self._img_by_filename:
continue
img = self._img_by_filename[file_name]
img_id = img['id']
height = img['height']
width = img['width']
segmentation = {'counts': rle, 'size': [height, width]}
local_track_id = int(local_track_id_str)
if self._exemplar_guided:
global_track_id = _global_track_id(
local_track_id=local_track_id, video_id=seq['id'],
track_id_mapping=self._gt['track_id_mapping'])
else:
global_track_id = local_track_id
coco_bbox = cocomask.toBbox(segmentation)
bbox = [int(x) for x in coco_bbox]
det = {'id': self._next_det_id, 'image_id': img_id,
'track_id': global_track_id, 'bbox': bbox,
'segmentation': segmentation}
if self._exemplar_guided:
if global_track_id not in self._gt_track_by_track_id:
self._filtered_out_track_ids.add(global_track_id)
continue
gt_track = self._gt_track_by_track_id[global_track_id]
category_id = gt_track['category_id']
det['category_id'] = category_id
elif 'category_id' in track_det:
det['category_id'] = track_det['category_id']
else:
category_id = seq['track_category_ids'][local_track_id_str]
det['category_id'] = category_id
self._next_det_id += 1
if 'score' in track_det:
det['score'] = track_det['score']
else:
det['score'] = 1.0
self._result.append(det)
def convert(self):
return self._result

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,91 @@
import json
import os
from .burst_helpers.burst_ow_base import BURST_OW_Base
from .burst_helpers.format_converter import GroundTruthBURSTFormatToTAOFormatConverter, PredictionBURSTFormatToTAOFormatConverter
from .. import utils
class BURST_OW(BURST_OW_Base):
"""Dataset class for TAO tracking"""
@staticmethod
def get_default_dataset_config():
tao_config = BURST_OW_Base.get_default_dataset_config()
code_path = utils.get_code_path()
tao_config['GT_FOLDER'] = os.path.join(
code_path, 'data/gt/burst/all_classes/val/') # Location of GT data
tao_config['TRACKERS_FOLDER'] = os.path.join(
code_path, 'data/trackers/burst/open-world/val/') # Trackers location
return tao_config
def _iou_type(self):
return 'mask'
def _box_or_mask_from_det(self, det):
if "segmentation" in det:
return det["segmentation"]
else:
return det["mask"]
def _calculate_area_for_ann(self, ann):
import pycocotools.mask as cocomask
seg = self._box_or_mask_from_det(ann)
return cocomask.area(seg)
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
return similarity_scores
def _postproc_ground_truth_data(self, data):
return GroundTruthBURSTFormatToTAOFormatConverter(data).convert()
def _postproc_prediction_data(self, data):
# if it's a list, it's already in TAO format and not in Ali format
# however the image ids do not match and need to be remapped
if isinstance(data, list):
_remap_image_ids(data, self.gt_data)
return data
return PredictionBURSTFormatToTAOFormatConverter(
self.gt_data, data,
exemplar_guided=False).convert()
def _remap_image_ids(pred_data, ali_gt_data):
code_path = utils.get_code_path()
if 'split' in ali_gt_data:
split = ali_gt_data['split']
else:
split = 'val'
if split in ('val', 'validation'):
tao_gt_path = os.path.join(
code_path, 'data/gt/tao/tao_validation/gt.json')
else:
tao_gt_path = os.path.join(
code_path, 'data/gt/tao/tao_test/test_without_annotations.json')
with open(tao_gt_path) as f:
tao_gt = json.load(f)
tao_img_by_id = {}
for img in tao_gt['images']:
img_id = img['id']
tao_img_by_id[img_id] = img
ali_img_id_by_filename = {}
for ali_img in ali_gt_data['images']:
ali_img_id = ali_img['id']
file_name = ali_img['file_name'].replace("validation", "val")
ali_img_id_by_filename[file_name] = ali_img_id
ali_img_id_by_tao_img_id = {}
for tao_img_id, tao_img in tao_img_by_id.items():
file_name = tao_img['file_name']
ali_img_id = ali_img_id_by_filename[file_name]
ali_img_id_by_tao_img_id[tao_img_id] = ali_img_id
for det in pred_data:
tao_img_id = det['image_id']
ali_img_id = ali_img_id_by_tao_img_id[tao_img_id]
det['image_id'] = ali_img_id

View File

@@ -0,0 +1,276 @@
import os
import csv
import numpy as np
from ._base_dataset import _BaseDataset
from ..utils import TrackEvalException
from .. import utils
from .. import _timing
class DAVIS(_BaseDataset):
"""Dataset class for DAVIS tracking"""
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/davis/davis_unsupervised_val/'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/davis/davis_unsupervised_val/'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'SPLIT_TO_EVAL': 'val', # Valid: 'val', 'train'
'CLASSES_TO_EVAL': ['general'],
'PRINT_CONFIG': True, # Whether to print current config
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
'SEQMAP_FILE': None, # Specify seqmap file
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
# '{gt_folder}/Annotations_unsupervised/480p/{seq}'
'MAX_DETECTIONS': 0 # Maximum number of allowed detections per sequence (0 for no threshold)
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
# defining a default class since there are no classes in DAVIS
self.should_classes_combine = False
self.use_super_categories = False
self.gt_fol = self.config['GT_FOLDER']
self.tracker_fol = self.config['TRACKERS_FOLDER']
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.config['TRACKERS_FOLDER']
self.max_det = self.config['MAX_DETECTIONS']
# Get classes to eval
self.valid_classes = ['general']
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
for cls in self.config['CLASSES_TO_EVAL']]
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only general class is valid.')
# Get sequences to eval
if self.config["SEQ_INFO"]:
self.seq_list = list(self.config["SEQ_INFO"].keys())
self.seq_lengths = self.config["SEQ_INFO"]
elif self.config["SEQMAP_FILE"]:
self.seq_list = []
seqmap_file = self.config["SEQMAP_FILE"]
if not os.path.isfile(seqmap_file):
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
with open(seqmap_file) as fp:
reader = csv.reader(fp)
for i, row in enumerate(reader):
if row[0] == '':
continue
seq = row[0]
self.seq_list.append(seq)
else:
self.seq_list = os.listdir(self.gt_fol)
self.seq_lengths = {seq: len(os.listdir(os.path.join(self.gt_fol, seq))) for seq in self.seq_list}
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
for tracker in self.tracker_list:
for seq in self.seq_list:
curr_dir = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq)
if not os.path.isdir(curr_dir):
print('Tracker directory not found: ' + curr_dir)
raise TrackEvalException('Tracker directory not found: ' +
os.path.join(tracker, self.tracker_sub_fol, seq))
tr_timesteps = len(os.listdir(curr_dir))
if self.seq_lengths[seq] != tr_timesteps:
raise TrackEvalException('GT folder and tracker folder have a different number'
'timesteps for tracker %s and sequence %s' % (tracker, seq))
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the DAVIS format
If is_gt, this returns a dict which contains the fields:
[gt_ids] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets]: list (for each timestep) of lists of detections.
[masks_void]: list of masks with void pixels (pixels to be ignored during evaluation)
if not is_gt, this returns a dict which contains the fields:
[tracker_ids] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
"""
# Only loaded when run to reduce minimum requirements
from pycocotools import mask as mask_utils
from PIL import Image
# File location
if is_gt:
seq_dir = os.path.join(self.gt_fol, seq)
else:
seq_dir = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq)
num_timesteps = self.seq_lengths[seq]
data_keys = ['ids', 'dets', 'masks_void']
raw_data = {key: [None] * num_timesteps for key in data_keys}
# read frames
frames = [os.path.join(seq_dir, im_name) for im_name in sorted(os.listdir(seq_dir))]
id_list = []
for t in range(num_timesteps):
frame = np.array(Image.open(frames[t]))
if is_gt:
void = frame == 255
frame[void] = 0
raw_data['masks_void'][t] = mask_utils.encode(np.asfortranarray(void.astype(np.uint8)))
id_values = np.unique(frame)
id_values = id_values[id_values != 0]
id_list += list(id_values)
tmp = np.ones((len(id_values), *frame.shape))
tmp = tmp * id_values[:, None, None]
masks = np.array(tmp == frame[None, ...]).astype(np.uint8)
raw_data['dets'][t] = mask_utils.encode(np.array(np.transpose(masks, (1, 2, 0)), order='F'))
raw_data['ids'][t] = id_values.astype(int)
num_objects = len(np.unique(id_list))
if not is_gt and num_objects > self.max_det > 0:
raise Exception('Number of proposals (%i) for sequence %s exceeds number of maximum allowed proposals (%i).'
% (num_objects, seq, self.max_det))
if is_gt:
key_map = {'ids': 'gt_ids',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data["num_timesteps"] = num_timesteps
raw_data['mask_shape'] = np.array(Image.open(frames[0])).shape
if is_gt:
raw_data['num_gt_ids'] = num_objects
else:
raw_data['num_tracker_ids'] = num_objects
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detection masks.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
DAVIS:
In DAVIS, the 4 preproc steps are as follow:
1) There are no classes, all detections are evaluated jointly
2) No matched tracker detections are removed.
3) No unmatched tracker detections are removed.
4) There are no ground truth detections (e.g. those of distractor classes) to be removed.
Preprocessing special to DAVIS: Pixels which are marked as void in the ground truth are set to zero in the
tracker detections since they are not considered during evaluation.
"""
# Only loaded when run to reduce minimum requirements
from pycocotools import mask as mask_utils
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
num_gt_dets = 0
num_tracker_dets = 0
unique_gt_ids = []
unique_tracker_ids = []
num_timesteps = raw_data['num_timesteps']
# count detections
for t in range(num_timesteps):
num_gt_dets += len(raw_data['gt_dets'][t])
num_tracker_dets += len(raw_data['tracker_dets'][t])
unique_gt_ids += list(np.unique(raw_data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(raw_data['tracker_ids'][t]))
data['gt_ids'] = raw_data['gt_ids']
data['gt_dets'] = raw_data['gt_dets']
data['similarity_scores'] = raw_data['similarity_scores']
data['tracker_ids'] = raw_data['tracker_ids']
# set void pixels in tracker detections to zero
for t in range(num_timesteps):
void_mask = raw_data['masks_void'][t]
if mask_utils.area(void_mask) > 0:
void_mask_ious = np.atleast_1d(mask_utils.iou(raw_data['tracker_dets'][t], [void_mask], [False]))
if void_mask_ious.any():
rows, columns = np.where(void_mask_ious > 0)
for r in rows:
det = mask_utils.decode(raw_data['tracker_dets'][t][r])
void = mask_utils.decode(void_mask).astype(np.bool)
det[void] = 0
det = mask_utils.encode(np.array(det, order='F').astype(np.uint8))
raw_data['tracker_dets'][t][r] = det
data['tracker_dets'] = raw_data['tracker_dets']
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = raw_data['num_tracker_ids']
data['num_gt_ids'] = raw_data['num_gt_ids']
data['mask_shape'] = raw_data['mask_shape']
data['num_timesteps'] = num_timesteps
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
return similarity_scores

View File

@@ -0,0 +1,459 @@
import os
import csv
import configparser
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_dataset import _BaseDataset
from .. import utils
from .. import _timing
from ..utils import TrackEvalException
class HeadTrackingChallenge(_BaseDataset):
"""Dataset class for Head Tracking Challenge - 2D bounding box tracking"""
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/mot_challenge/'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/mot_challenge/'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'CLASSES_TO_EVAL': ['pedestrian'], # Valid: ['pedestrian']
'BENCHMARK': 'HT', # Valid: 'HT'. Refers to "Head Tracking or the dataset CroHD"
'SPLIT_TO_EVAL': 'train', # Valid: 'train', 'test', 'all'
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
'PRINT_CONFIG': True, # Whether to print current config
'DO_PREPROC': True, # Whether to perform preprocessing (never done for MOT15)
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use seqmap_folder/benchmark-split_to_eval)
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt', # '{gt_folder}/{seq}/gt/gt.txt'
'SKIP_SPLIT_FOL': False, # If False, data is in GT_FOLDER/BENCHMARK-SPLIT_TO_EVAL/ and in
# TRACKERS_FOLDER/BENCHMARK-SPLIT_TO_EVAL/tracker/
# If True, then the middle 'benchmark-split' folder is skipped for both.
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
self.benchmark = self.config['BENCHMARK']
gt_set = self.config['BENCHMARK'] + '-' + self.config['SPLIT_TO_EVAL']
self.gt_set = gt_set
if not self.config['SKIP_SPLIT_FOL']:
split_fol = gt_set
else:
split_fol = ''
self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
self.should_classes_combine = False
self.use_super_categories = False
self.data_is_zipped = self.config['INPUT_AS_ZIP']
self.do_preproc = self.config['DO_PREPROC']
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
# Get classes to eval
self.valid_classes = ['pedestrian']
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
for cls in self.config['CLASSES_TO_EVAL']]
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
self.class_name_to_class_id = {'pedestrian': 1, 'static': 2, 'ignore': 3, 'person_on_vehicle': 4}
self.valid_class_numbers = list(self.class_name_to_class_id.values())
# Get sequences to eval and check gt files exist
self.seq_list, self.seq_lengths = self._get_seq_info()
if len(self.seq_list) < 1:
raise TrackEvalException('No sequences are selected to be evaluated.')
# Check gt files exist
for seq in self.seq_list:
if not self.data_is_zipped:
curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
if not os.path.isfile(curr_file):
print('GT file not found ' + curr_file)
raise TrackEvalException('GT file not found for sequence: ' + seq)
if self.data_is_zipped:
curr_file = os.path.join(self.gt_fol, 'data.zip')
if not os.path.isfile(curr_file):
print('GT file not found ' + curr_file)
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
for tracker in self.tracker_list:
if self.data_is_zipped:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
else:
for seq in self.seq_list:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException(
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
curr_file))
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _get_seq_info(self):
seq_list = []
seq_lengths = {}
if self.config["SEQ_INFO"]:
seq_list = list(self.config["SEQ_INFO"].keys())
seq_lengths = self.config["SEQ_INFO"]
# If sequence length is 'None' tries to read sequence length from .ini files.
for seq, seq_length in seq_lengths.items():
if seq_length is None:
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
if not os.path.isfile(ini_file):
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
ini_data = configparser.ConfigParser()
ini_data.read(ini_file)
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
else:
if self.config["SEQMAP_FILE"]:
seqmap_file = self.config["SEQMAP_FILE"]
else:
if self.config["SEQMAP_FOLDER"] is None:
seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
else:
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
if not os.path.isfile(seqmap_file):
print('no seqmap found: ' + seqmap_file)
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
with open(seqmap_file) as fp:
reader = csv.reader(fp)
for i, row in enumerate(reader):
if i == 0 or row[0] == '':
continue
seq = row[0]
seq_list.append(seq)
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
if not os.path.isfile(ini_file):
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
ini_data = configparser.ConfigParser()
ini_data.read(ini_file)
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
return seq_list, seq_lengths
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the MOT Challenge 2D box format
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
[gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
"""
# File location
if self.data_is_zipped:
if is_gt:
zip_file = os.path.join(self.gt_fol, 'data.zip')
else:
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
file = seq + '.txt'
else:
zip_file = None
if is_gt:
file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
else:
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
# Load raw data from text file
read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file)
# Convert data to required format
num_timesteps = self.seq_lengths[seq]
data_keys = ['ids', 'classes', 'dets']
if is_gt:
data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
else:
data_keys += ['tracker_confidences']
if self.benchmark == 'HT':
data_keys += ['visibility']
data_keys += ['gt_conf']
raw_data = {key: [None] * num_timesteps for key in data_keys}
# Check for any extra time keys
current_time_keys = [str( t+ 1) for t in range(num_timesteps)]
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
if len(extra_time_keys) > 0:
if is_gt:
text = 'Ground-truth'
else:
text = 'Tracking'
raise TrackEvalException(
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
[str(x) + ', ' for x in extra_time_keys]))
for t in range(num_timesteps):
time_key = str(t+1)
if time_key in read_data.keys():
try:
time_data = np.asarray(read_data[time_key], dtype=np.float)
except ValueError:
if is_gt:
raise TrackEvalException(
'Cannot convert gt data for sequence %s to float. Is data corrupted?' % seq)
else:
raise TrackEvalException(
'Cannot convert tracking data from tracker %s, sequence %s to float. Is data corrupted?' % (
tracker, seq))
try:
raw_data['dets'][t] = np.atleast_2d(time_data[:, 2:6])
raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
except IndexError:
if is_gt:
err = 'Cannot load gt data from sequence %s, because there is not enough ' \
'columns in the data.' % seq
raise TrackEvalException(err)
else:
err = 'Cannot load tracker data from tracker %s, sequence %s, because there is not enough ' \
'columns in the data.' % (tracker, seq)
raise TrackEvalException(err)
if time_data.shape[1] >= 8:
raw_data['gt_conf'][t] = np.atleast_1d(time_data[:, 6]).astype(float)
raw_data['visibility'][t] = np.atleast_1d(time_data[:, 8]).astype(float)
raw_data['classes'][t] = np.atleast_1d(time_data[:, 7]).astype(int)
else:
if not is_gt:
raw_data['classes'][t] = np.ones_like(raw_data['ids'][t])
else:
raise TrackEvalException(
'GT data is not in a valid format, there is not enough rows in seq %s, timestep %i.' % (
seq, t))
if is_gt:
gt_extras_dict = {'zero_marked': np.atleast_1d(time_data[:, 6].astype(int))}
raw_data['gt_extras'][t] = gt_extras_dict
else:
raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 6])
else:
raw_data['dets'][t] = np.empty((0, 4))
raw_data['ids'][t] = np.empty(0).astype(int)
raw_data['classes'][t] = np.empty(0).astype(int)
if is_gt:
gt_extras_dict = {'zero_marked': np.empty(0)}
raw_data['gt_extras'][t] = gt_extras_dict
else:
raw_data['tracker_confidences'][t] = np.empty(0)
if is_gt:
raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data['num_timesteps'] = num_timesteps
raw_data['seq'] = seq
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
MOT Challenge:
In MOT Challenge, the 4 preproc steps are as follow:
1) There is only one class (pedestrian) to be evaluated, but all other classes are used for preproc.
2) Predictions are matched against all gt boxes (regardless of class), those matching with distractor
objects are removed.
3) There is no crowd ignore regions.
4) All gt dets except pedestrian are removed, also removes pedestrian gt dets marked with zero_marked.
"""
# Check that input data has unique ids
self._check_unique_ids(raw_data)
# 'static': 2, 'ignore': 3, 'person_on_vehicle':
distractor_class_names = ['static', 'ignore', 'person_on_vehicle']
distractor_classes = [self.class_name_to_class_id[x] for x in distractor_class_names]
cls_id = self.class_name_to_class_id[cls]
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences',
'similarity_scores', 'gt_visibility']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Get all data
gt_ids = raw_data['gt_ids'][t]
gt_dets = raw_data['gt_dets'][t]
gt_classes = raw_data['gt_classes'][t]
gt_visibility = raw_data['visibility'][t]
gt_conf = raw_data['gt_conf'][t]
gt_zero_marked = raw_data['gt_extras'][t]['zero_marked']
tracker_ids = raw_data['tracker_ids'][t]
tracker_dets = raw_data['tracker_dets'][t]
tracker_classes = raw_data['tracker_classes'][t]
tracker_confidences = raw_data['tracker_confidences'][t]
similarity_scores = raw_data['similarity_scores'][t]
# Evaluation is ONLY valid for pedestrian class
if len(tracker_classes) > 0 and np.max(tracker_classes) > 1:
raise TrackEvalException(
'Evaluation is only valid for pedestrian class. Non pedestrian class (%i) found in sequence %s at '
'timestep %i.' % (np.max(tracker_classes), raw_data['seq'], t))
# Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
# which are labeled as belonging to a distractor class.
to_remove_tracker = np.array([], np.int)
if self.do_preproc and self.benchmark != 'MOT15' and gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
# Check all classes are valid:
invalid_classes = np.setdiff1d(np.unique(gt_classes), self.valid_class_numbers)
if len(invalid_classes) > 0:
print(' '.join([str(x) for x in invalid_classes]))
raise(TrackEvalException('Attempting to evaluate using invalid gt classes. '
'This warning only triggers if preprocessing is performed, '
'e.g. not for MOT15 or where prepropressing is explicitly disabled. '
'Please either check your gt data, or disable preprocessing. '
'The following invalid classes were found in timestep ' + str(t) + ': ' +
' '.join([str(x) for x in invalid_classes])))
matching_scores = similarity_scores.copy()
matching_scores[matching_scores < 0.4 - np.finfo('float').eps] = 0
match_rows, match_cols = linear_sum_assignment(-matching_scores)
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
match_rows = match_rows[actually_matched_mask]
match_cols = match_cols[actually_matched_mask]
is_distractor_class = np.logical_not(np.isin(gt_classes[match_rows], cls_id))
if self.benchmark == 'HT':
is_invisible_class = gt_visibility[match_rows] < np.finfo('float').eps
low_conf_class = gt_conf[match_rows] < np.finfo('float').eps
are_distractors = np.logical_or(is_invisible_class, is_distractor_class, low_conf_class)
to_remove_tracker = match_cols[are_distractors]
else:
to_remove_tracker = match_cols[is_distractor_class]
# Apply preprocessing to remove all unwanted tracker dets.
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
# Remove gt detections marked as to remove (zero marked), and also remove gt detections not in pedestrian
if self.do_preproc and self.benchmark == 'HT':
gt_to_keep_mask = (np.not_equal(gt_zero_marked, 0)) & \
(np.equal(gt_classes, cls_id)) & \
(gt_visibility > 0.) & \
(gt_conf > 0.)
else:
# There are no classes for MOT15
gt_to_keep_mask = np.not_equal(gt_zero_marked, 0)
data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
data['gt_visibility'][t] = gt_visibility # No mask!
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
data['seq'] = raw_data['seq']
# Ensure again that ids are unique per timestep after preproc.
self._check_unique_ids(data, after_preproc=True)
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='xywh')
return similarity_scores

View File

@@ -0,0 +1,389 @@
import os
import csv
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_dataset import _BaseDataset
from .. import utils
from ..utils import TrackEvalException
from .. import _timing
class Kitti2DBox(_BaseDataset):
"""Dataset class for KITTI 2D bounding box tracking"""
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/kitti/kitti_2d_box_train'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/kitti/kitti_2d_box_train/'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'CLASSES_TO_EVAL': ['car', 'pedestrian'], # Valid: ['car', 'pedestrian']
'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val', 'training_minus_val', 'test'
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
'PRINT_CONFIG': True, # Whether to print current config
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
self.gt_fol = self.config['GT_FOLDER']
self.tracker_fol = self.config['TRACKERS_FOLDER']
self.should_classes_combine = False
self.use_super_categories = False
self.data_is_zipped = self.config['INPUT_AS_ZIP']
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
self.max_occlusion = 2
self.max_truncation = 0
self.min_height = 25
# Get classes to eval
self.valid_classes = ['car', 'pedestrian']
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
for cls in self.config['CLASSES_TO_EVAL']]
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes [car, pedestrian] are valid.')
self.class_name_to_class_id = {'car': 1, 'van': 2, 'truck': 3, 'pedestrian': 4, 'person': 5, # person sitting
'cyclist': 6, 'tram': 7, 'misc': 8, 'dontcare': 9, 'car_2': 1}
# Get sequences to eval and check gt files exist
self.seq_list = []
self.seq_lengths = {}
seqmap_name = 'evaluate_tracking.seqmap.' + self.config['SPLIT_TO_EVAL']
seqmap_file = os.path.join(self.gt_fol, seqmap_name)
if not os.path.isfile(seqmap_file):
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
with open(seqmap_file) as fp:
dialect = csv.Sniffer().sniff(fp.read(1024))
fp.seek(0)
reader = csv.reader(fp, dialect)
for row in reader:
if len(row) >= 4:
seq = row[0]
self.seq_list.append(seq)
self.seq_lengths[seq] = int(row[3])
if not self.data_is_zipped:
curr_file = os.path.join(self.gt_fol, 'label_02', seq + '.txt')
if not os.path.isfile(curr_file):
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
if self.data_is_zipped:
curr_file = os.path.join(self.gt_fol, 'data.zip')
if not os.path.isfile(curr_file):
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
for tracker in self.tracker_list:
if self.data_is_zipped:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
if not os.path.isfile(curr_file):
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
else:
for seq in self.seq_list:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
if not os.path.isfile(curr_file):
raise TrackEvalException(
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
curr_file))
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the kitti 2D box format
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
[gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
"""
# File location
if self.data_is_zipped:
if is_gt:
zip_file = os.path.join(self.gt_fol, 'data.zip')
else:
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
file = seq + '.txt'
else:
zip_file = None
if is_gt:
file = os.path.join(self.gt_fol, 'label_02', seq + '.txt')
else:
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
# Ignore regions
if is_gt:
crowd_ignore_filter = {2: ['dontcare']}
else:
crowd_ignore_filter = None
# Valid classes
valid_filter = {2: [x for x in self.class_list]}
if is_gt:
if 'car' in self.class_list:
valid_filter[2].append('van')
if 'pedestrian' in self.class_list:
valid_filter[2] += ['person']
# Convert kitti class strings to class ids
convert_filter = {2: self.class_name_to_class_id}
# Load raw data from text file
read_data, ignore_data = self._load_simple_text_file(file, time_col=0, id_col=1, remove_negative_ids=True,
valid_filter=valid_filter,
crowd_ignore_filter=crowd_ignore_filter,
convert_filter=convert_filter,
is_zipped=self.data_is_zipped, zip_file=zip_file)
# Convert data to required format
num_timesteps = self.seq_lengths[seq]
data_keys = ['ids', 'classes', 'dets']
if is_gt:
data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
else:
data_keys += ['tracker_confidences']
raw_data = {key: [None] * num_timesteps for key in data_keys}
# Check for any extra time keys
current_time_keys = [str(t) for t in range(num_timesteps)]
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
if len(extra_time_keys) > 0:
if is_gt:
text = 'Ground-truth'
else:
text = 'Tracking'
raise TrackEvalException(
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
[str(x) + ', ' for x in extra_time_keys]))
for t in range(num_timesteps):
time_key = str(t)
if time_key in read_data.keys():
time_data = np.asarray(read_data[time_key], dtype=np.float)
raw_data['dets'][t] = np.atleast_2d(time_data[:, 6:10])
raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
raw_data['classes'][t] = np.atleast_1d(time_data[:, 2]).astype(int)
if is_gt:
gt_extras_dict = {'truncation': np.atleast_1d(time_data[:, 3].astype(int)),
'occlusion': np.atleast_1d(time_data[:, 4].astype(int))}
raw_data['gt_extras'][t] = gt_extras_dict
else:
if time_data.shape[1] > 17:
raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 17])
else:
raw_data['tracker_confidences'][t] = np.ones(time_data.shape[0])
else:
raw_data['dets'][t] = np.empty((0, 4))
raw_data['ids'][t] = np.empty(0).astype(int)
raw_data['classes'][t] = np.empty(0).astype(int)
if is_gt:
gt_extras_dict = {'truncation': np.empty(0),
'occlusion': np.empty(0)}
raw_data['gt_extras'][t] = gt_extras_dict
else:
raw_data['tracker_confidences'][t] = np.empty(0)
if is_gt:
if time_key in ignore_data.keys():
time_ignore = np.asarray(ignore_data[time_key], dtype=np.float)
raw_data['gt_crowd_ignore_regions'][t] = np.atleast_2d(time_ignore[:, 6:10])
else:
raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data['num_timesteps'] = num_timesteps
raw_data['seq'] = seq
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
KITTI:
In KITTI, the 4 preproc steps are as follow:
1) There are two classes (pedestrian and car) which are evaluated separately.
2) For the pedestrian class, the 'person' class is distractor objects (people sitting).
For the car class, the 'van' class are distractor objects.
GT boxes marked as having occlusion level > 2 or truncation level > 0 are also treated as
distractors.
3) Crowd ignore regions are used to remove unmatched detections. Also unmatched detections with
height <= 25 pixels are removed.
4) Distractor gt dets (including truncated and occluded) are removed.
"""
if cls == 'pedestrian':
distractor_classes = [self.class_name_to_class_id['person']]
elif cls == 'car':
distractor_classes = [self.class_name_to_class_id['van']]
else:
raise (TrackEvalException('Class %s is not evaluatable' % cls))
cls_id = self.class_name_to_class_id[cls]
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Only extract relevant dets for this class for preproc and eval (cls + distractor classes)
gt_class_mask = np.sum([raw_data['gt_classes'][t] == c for c in [cls_id] + distractor_classes], axis=0)
gt_class_mask = gt_class_mask.astype(np.bool)
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
gt_classes = raw_data['gt_classes'][t][gt_class_mask]
gt_occlusion = raw_data['gt_extras'][t]['occlusion'][gt_class_mask]
gt_truncation = raw_data['gt_extras'][t]['truncation'][gt_class_mask]
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
tracker_class_mask = tracker_class_mask.astype(np.bool)
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
# Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
# which are labeled as truncated, occluded, or belonging to a distractor class.
to_remove_matched = np.array([], np.int)
unmatched_indices = np.arange(tracker_ids.shape[0])
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
matching_scores = similarity_scores.copy()
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
match_rows, match_cols = linear_sum_assignment(-matching_scores)
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
match_rows = match_rows[actually_matched_mask]
match_cols = match_cols[actually_matched_mask]
is_distractor_class = np.isin(gt_classes[match_rows], distractor_classes)
is_occluded_or_truncated = np.logical_or(
gt_occlusion[match_rows] > self.max_occlusion + np.finfo('float').eps,
gt_truncation[match_rows] > self.max_truncation + np.finfo('float').eps)
to_remove_matched = np.logical_or(is_distractor_class, is_occluded_or_truncated)
to_remove_matched = match_cols[to_remove_matched]
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
# For unmatched tracker dets, also remove those smaller than a minimum height.
unmatched_tracker_dets = tracker_dets[unmatched_indices, :]
unmatched_heights = unmatched_tracker_dets[:, 3] - unmatched_tracker_dets[:, 1]
is_too_small = unmatched_heights <= self.min_height + np.finfo('float').eps
# For unmatched tracker dets, also remove those that are greater than 50% within a crowd ignore region.
crowd_ignore_regions = raw_data['gt_crowd_ignore_regions'][t]
intersection_with_ignore_region = self._calculate_box_ious(unmatched_tracker_dets, crowd_ignore_regions,
box_format='x0y0x1y1', do_ioa=True)
is_within_crowd_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps, axis=1)
# Apply preprocessing to remove all unwanted tracker dets.
to_remove_unmatched = unmatched_indices[np.logical_or(is_too_small, is_within_crowd_ignore_region)]
to_remove_tracker = np.concatenate((to_remove_matched, to_remove_unmatched), axis=0)
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
# Also remove gt dets that were only useful for preprocessing and are not needed for evaluation.
# These are those that are occluded, truncated and from distractor objects.
gt_to_keep_mask = (np.less_equal(gt_occlusion, self.max_occlusion)) & \
(np.less_equal(gt_truncation, self.max_truncation)) & \
(np.equal(gt_classes, cls_id))
data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
data['seq'] = raw_data['seq']
# Ensure that ids are unique per timestep.
self._check_unique_ids(data)
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='x0y0x1y1')
return similarity_scores

View File

@@ -0,0 +1,426 @@
import os
import csv
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_dataset import _BaseDataset
from .. import utils
from .. import _timing
from ..utils import TrackEvalException
class KittiMOTS(_BaseDataset):
"""Dataset class for KITTI MOTS tracking"""
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/kitti/kitti_mots_val'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/kitti/kitti_mots_val'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'CLASSES_TO_EVAL': ['car', 'pedestrian'], # Valid: ['car', 'pedestrian']
'SPLIT_TO_EVAL': 'val', # Valid: 'training', 'val'
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
'PRINT_CONFIG': True, # Whether to print current config
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER)
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use seqmap_folder/split_to_eval.seqmap)
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
'GT_LOC_FORMAT': '{gt_folder}/label_02/{seq}.txt', # format of gt localization
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
self.gt_fol = self.config['GT_FOLDER']
self.tracker_fol = self.config['TRACKERS_FOLDER']
self.split_to_eval = self.config['SPLIT_TO_EVAL']
self.should_classes_combine = False
self.use_super_categories = False
self.data_is_zipped = self.config['INPUT_AS_ZIP']
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
# Get classes to eval
self.valid_classes = ['car', 'pedestrian']
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
for cls in self.config['CLASSES_TO_EVAL']]
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. '
'Only classes [car, pedestrian] are valid.')
self.class_name_to_class_id = {'car': '1', 'pedestrian': '2', 'ignore': '10'}
# Get sequences to eval and check gt files exist
self.seq_list, self.seq_lengths = self._get_seq_info()
if len(self.seq_list) < 1:
raise TrackEvalException('No sequences are selected to be evaluated.')
# Check gt files exist
for seq in self.seq_list:
if not self.data_is_zipped:
curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
if not os.path.isfile(curr_file):
print('GT file not found ' + curr_file)
raise TrackEvalException('GT file not found for sequence: ' + seq)
if self.data_is_zipped:
curr_file = os.path.join(self.gt_fol, 'data.zip')
if not os.path.isfile(curr_file):
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
for tracker in self.tracker_list:
if self.data_is_zipped:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
else:
for seq in self.seq_list:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException(
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
curr_file))
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _get_seq_info(self):
seq_list = []
seq_lengths = {}
seqmap_name = 'evaluate_mots.seqmap.' + self.config['SPLIT_TO_EVAL']
if self.config["SEQ_INFO"]:
seq_list = list(self.config["SEQ_INFO"].keys())
seq_lengths = self.config["SEQ_INFO"]
else:
if self.config["SEQMAP_FILE"]:
seqmap_file = self.config["SEQMAP_FILE"]
else:
if self.config["SEQMAP_FOLDER"] is None:
seqmap_file = os.path.join(self.config['GT_FOLDER'], seqmap_name)
else:
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], seqmap_name)
if not os.path.isfile(seqmap_file):
print('no seqmap found: ' + seqmap_file)
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
with open(seqmap_file) as fp:
reader = csv.reader(fp)
for i, _ in enumerate(reader):
dialect = csv.Sniffer().sniff(fp.read(1024))
fp.seek(0)
reader = csv.reader(fp, dialect)
for row in reader:
if len(row) >= 4:
seq = "%04d" % int(row[0])
seq_list.append(seq)
seq_lengths[seq] = int(row[3]) + 1
return seq_list, seq_lengths
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the KITTI MOTS format
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets]: list (for each timestep) of lists of detections.
[gt_ignore_region]: list (for each timestep) of masks for the ignore regions
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
"""
# Only loaded when run to reduce minimum requirements
from pycocotools import mask as mask_utils
# File location
if self.data_is_zipped:
if is_gt:
zip_file = os.path.join(self.gt_fol, 'data.zip')
else:
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
file = seq + '.txt'
else:
zip_file = None
if is_gt:
file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
else:
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
# Ignore regions
if is_gt:
crowd_ignore_filter = {2: ['10']}
else:
crowd_ignore_filter = None
# Load raw data from text file
read_data, ignore_data = self._load_simple_text_file(file, crowd_ignore_filter=crowd_ignore_filter,
is_zipped=self.data_is_zipped, zip_file=zip_file,
force_delimiters=' ')
# Convert data to required format
num_timesteps = self.seq_lengths[seq]
data_keys = ['ids', 'classes', 'dets']
if is_gt:
data_keys += ['gt_ignore_region']
raw_data = {key: [None] * num_timesteps for key in data_keys}
# Check for any extra time keys
current_time_keys = [str(t) for t in range(num_timesteps)]
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
if len(extra_time_keys) > 0:
if is_gt:
text = 'Ground-truth'
else:
text = 'Tracking'
raise TrackEvalException(
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
[str(x) + ', ' for x in extra_time_keys]))
for t in range(num_timesteps):
time_key = str(t)
# list to collect all masks of a timestep to check for overlapping areas
all_masks = []
if time_key in read_data.keys():
try:
raw_data['dets'][t] = [{'size': [int(region[3]), int(region[4])],
'counts': region[5].encode(encoding='UTF-8')}
for region in read_data[time_key]]
raw_data['ids'][t] = np.atleast_1d([region[1] for region in read_data[time_key]]).astype(int)
raw_data['classes'][t] = np.atleast_1d([region[2] for region in read_data[time_key]]).astype(int)
all_masks += raw_data['dets'][t]
except IndexError:
self._raise_index_error(is_gt, tracker, seq)
except ValueError:
self._raise_value_error(is_gt, tracker, seq)
else:
raw_data['dets'][t] = []
raw_data['ids'][t] = np.empty(0).astype(int)
raw_data['classes'][t] = np.empty(0).astype(int)
if is_gt:
if time_key in ignore_data.keys():
try:
time_ignore = [{'size': [int(region[3]), int(region[4])],
'counts': region[5].encode(encoding='UTF-8')}
for region in ignore_data[time_key]]
raw_data['gt_ignore_region'][t] = mask_utils.merge([mask for mask in time_ignore],
intersect=False)
all_masks += [raw_data['gt_ignore_region'][t]]
except IndexError:
self._raise_index_error(is_gt, tracker, seq)
except ValueError:
self._raise_value_error(is_gt, tracker, seq)
else:
raw_data['gt_ignore_region'][t] = mask_utils.merge([], intersect=False)
# check for overlapping masks
if all_masks:
masks_merged = all_masks[0]
for mask in all_masks[1:]:
if mask_utils.area(mask_utils.merge([masks_merged, mask], intersect=True)) != 0.0:
raise TrackEvalException(
'Tracker has overlapping masks. Tracker: ' + tracker + ' Seq: ' + seq + ' Timestep: ' + str(
t))
masks_merged = mask_utils.merge([masks_merged, mask], intersect=False)
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data["num_timesteps"] = num_timesteps
raw_data['seq'] = seq
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detection masks.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
KITTI MOTS:
In KITTI MOTS, the 4 preproc steps are as follow:
1) There are two classes (car and pedestrian) which are evaluated separately.
2) There are no ground truth detections marked as to be removed/distractor classes.
Therefore also no matched tracker detections are removed.
3) Ignore regions are used to remove unmatched detections (at least 50% overlap with ignore region).
4) There are no ground truth detections (e.g. those of distractor classes) to be removed.
"""
# Check that input data has unique ids
self._check_unique_ids(raw_data)
cls_id = int(self.class_name_to_class_id[cls])
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Only extract relevant dets for this class for preproc and eval (cls)
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
gt_class_mask = gt_class_mask.astype(np.bool)
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
tracker_class_mask = tracker_class_mask.astype(np.bool)
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
tracker_dets = [raw_data['tracker_dets'][t][ind] for ind in range(len(tracker_class_mask)) if
tracker_class_mask[ind]]
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
# Match tracker and gt dets (with hungarian algorithm)
unmatched_indices = np.arange(tracker_ids.shape[0])
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
matching_scores = similarity_scores.copy()
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = -10000
match_rows, match_cols = linear_sum_assignment(-matching_scores)
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
match_cols = match_cols[actually_matched_mask]
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
# For unmatched tracker dets, remove those that are greater than 50% within a crowd ignore region.
unmatched_tracker_dets = [tracker_dets[i] for i in range(len(tracker_dets)) if i in unmatched_indices]
ignore_region = raw_data['gt_ignore_region'][t]
intersection_with_ignore_region = self._calculate_mask_ious(unmatched_tracker_dets, [ignore_region],
is_encoded=True, do_ioa=True)
is_within_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps, axis=1)
# Apply preprocessing to remove unwanted tracker dets.
to_remove_tracker = unmatched_indices[is_within_ignore_region]
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
# Keep all ground truth detections
data['gt_ids'][t] = gt_ids
data['gt_dets'][t] = gt_dets
data['similarity_scores'][t] = similarity_scores
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
data['seq'] = raw_data['seq']
data['cls'] = cls
# Ensure again that ids are unique per timestep after preproc.
self._check_unique_ids(data, after_preproc=True)
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
return similarity_scores
@staticmethod
def _raise_index_error(is_gt, tracker, seq):
"""
Auxiliary method to raise an evaluation error in case of an index error while reading files.
:param is_gt: whether gt or tracker data is read
:param tracker: the name of the tracker
:param seq: the name of the seq
:return: None
"""
if is_gt:
err = 'Cannot load gt data from sequence %s, because there are not enough ' \
'columns in the data.' % seq
raise TrackEvalException(err)
else:
err = 'Cannot load tracker data from tracker %s, sequence %s, because there are not enough ' \
'columns in the data.' % (tracker, seq)
raise TrackEvalException(err)
@staticmethod
def _raise_value_error(is_gt, tracker, seq):
"""
Auxiliary method to raise an evaluation error in case of an value error while reading files.
:param is_gt: whether gt or tracker data is read
:param tracker: the name of the tracker
:param seq: the name of the seq
:return: None
"""
if is_gt:
raise TrackEvalException(
'GT data for sequence %s cannot be converted to the right format. Is data corrupted?' % seq)
else:
raise TrackEvalException(
'Tracking data from tracker %s, sequence %s cannot be converted to the right format. '
'Is data corrupted?' % (tracker, seq))

View File

@@ -0,0 +1,437 @@
import os
import csv
import configparser
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_dataset import _BaseDataset
from .. import utils
from .. import _timing
from ..utils import TrackEvalException
class MotChallenge2DBox(_BaseDataset):
"""Dataset class for MOT Challenge 2D bounding box tracking"""
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/mot_challenge/'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/mot_challenge/'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'CLASSES_TO_EVAL': ['pedestrian'], # Valid: ['pedestrian']
'BENCHMARK': 'MOT17', # Valid: 'MOT17', 'MOT16', 'MOT20', 'MOT15'
'SPLIT_TO_EVAL': 'train', # Valid: 'train', 'test', 'all'
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
'PRINT_CONFIG': True, # Whether to print current config
'DO_PREPROC': True, # Whether to perform preprocessing (never done for MOT15)
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use seqmap_folder/benchmark-split_to_eval)
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt', # '{gt_folder}/{seq}/gt/gt.txt'
'SKIP_SPLIT_FOL': False, # If False, data is in GT_FOLDER/BENCHMARK-SPLIT_TO_EVAL/ and in
# TRACKERS_FOLDER/BENCHMARK-SPLIT_TO_EVAL/tracker/
# If True, then the middle 'benchmark-split' folder is skipped for both.
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
self.benchmark = self.config['BENCHMARK']
gt_set = self.config['BENCHMARK'] + '-' + self.config['SPLIT_TO_EVAL']
self.gt_set = gt_set
if not self.config['SKIP_SPLIT_FOL']:
split_fol = gt_set
else:
split_fol = ''
self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
self.should_classes_combine = False
self.use_super_categories = False
self.data_is_zipped = self.config['INPUT_AS_ZIP']
self.do_preproc = self.config['DO_PREPROC']
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
# Get classes to eval
self.valid_classes = ['pedestrian']
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
for cls in self.config['CLASSES_TO_EVAL']]
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
self.class_name_to_class_id = {'pedestrian': 1, 'person_on_vehicle': 2, 'car': 3, 'bicycle': 4, 'motorbike': 5,
'non_mot_vehicle': 6, 'static_person': 7, 'distractor': 8, 'occluder': 9,
'occluder_on_ground': 10, 'occluder_full': 11, 'reflection': 12, 'crowd': 13}
self.valid_class_numbers = list(self.class_name_to_class_id.values())
# Get sequences to eval and check gt files exist
self.seq_list, self.seq_lengths = self._get_seq_info()
if len(self.seq_list) < 1:
raise TrackEvalException('No sequences are selected to be evaluated.')
# Check gt files exist
for seq in self.seq_list:
if not self.data_is_zipped:
curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
if not os.path.isfile(curr_file):
print('GT file not found ' + curr_file)
raise TrackEvalException('GT file not found for sequence: ' + seq)
if self.data_is_zipped:
curr_file = os.path.join(self.gt_fol, 'data.zip')
if not os.path.isfile(curr_file):
print('GT file not found ' + curr_file)
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
for tracker in self.tracker_list:
if self.data_is_zipped:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
else:
for seq in self.seq_list:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException(
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
curr_file))
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _get_seq_info(self):
seq_list = []
seq_lengths = {}
if self.config["SEQ_INFO"]:
seq_list = list(self.config["SEQ_INFO"].keys())
seq_lengths = self.config["SEQ_INFO"]
# If sequence length is 'None' tries to read sequence length from .ini files.
for seq, seq_length in seq_lengths.items():
if seq_length is None:
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
if not os.path.isfile(ini_file):
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
ini_data = configparser.ConfigParser()
ini_data.read(ini_file)
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
else:
if self.config["SEQMAP_FILE"]:
seqmap_file = self.config["SEQMAP_FILE"]
else:
if self.config["SEQMAP_FOLDER"] is None:
seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
else:
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
if not os.path.isfile(seqmap_file):
print('no seqmap found: ' + seqmap_file)
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
with open(seqmap_file) as fp:
reader = csv.reader(fp)
for i, row in enumerate(reader):
if i == 0 or row[0] == '':
continue
seq = row[0]
seq_list.append(seq)
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
if not os.path.isfile(ini_file):
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
ini_data = configparser.ConfigParser()
ini_data.read(ini_file)
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
return seq_list, seq_lengths
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the MOT Challenge 2D box format
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
[gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
"""
# File location
if self.data_is_zipped:
if is_gt:
zip_file = os.path.join(self.gt_fol, 'data.zip')
else:
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
file = seq + '.txt'
else:
zip_file = None
if is_gt:
file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
else:
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
# Load raw data from text file
read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file)
# Convert data to required format
num_timesteps = self.seq_lengths[seq]
data_keys = ['ids', 'classes', 'dets']
if is_gt:
data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
else:
data_keys += ['tracker_confidences']
raw_data = {key: [None] * num_timesteps for key in data_keys}
# Check for any extra time keys
current_time_keys = [str( t+ 1) for t in range(num_timesteps)]
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
if len(extra_time_keys) > 0:
if is_gt:
text = 'Ground-truth'
else:
text = 'Tracking'
raise TrackEvalException(
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
[str(x) + ', ' for x in extra_time_keys]))
for t in range(num_timesteps):
time_key = str(t+1)
if time_key in read_data.keys():
try:
time_data = np.asarray(read_data[time_key], dtype=np.float)
except ValueError:
if is_gt:
raise TrackEvalException(
'Cannot convert gt data for sequence %s to float. Is data corrupted?' % seq)
else:
raise TrackEvalException(
'Cannot convert tracking data from tracker %s, sequence %s to float. Is data corrupted?' % (
tracker, seq))
try:
raw_data['dets'][t] = np.atleast_2d(time_data[:, 2:6])
raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
except IndexError:
if is_gt:
err = 'Cannot load gt data from sequence %s, because there is not enough ' \
'columns in the data.' % seq
raise TrackEvalException(err)
else:
err = 'Cannot load tracker data from tracker %s, sequence %s, because there is not enough ' \
'columns in the data.' % (tracker, seq)
raise TrackEvalException(err)
if time_data.shape[1] >= 8:
raw_data['classes'][t] = np.atleast_1d(time_data[:, 7]).astype(int)
else:
if not is_gt:
raw_data['classes'][t] = np.ones_like(raw_data['ids'][t])
else:
raise TrackEvalException(
'GT data is not in a valid format, there is not enough rows in seq %s, timestep %i.' % (
seq, t))
if is_gt:
gt_extras_dict = {'zero_marked': np.atleast_1d(time_data[:, 6].astype(int))}
raw_data['gt_extras'][t] = gt_extras_dict
else:
raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 6])
else:
raw_data['dets'][t] = np.empty((0, 4))
raw_data['ids'][t] = np.empty(0).astype(int)
raw_data['classes'][t] = np.empty(0).astype(int)
if is_gt:
gt_extras_dict = {'zero_marked': np.empty(0)}
raw_data['gt_extras'][t] = gt_extras_dict
else:
raw_data['tracker_confidences'][t] = np.empty(0)
if is_gt:
raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data['num_timesteps'] = num_timesteps
raw_data['seq'] = seq
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
MOT Challenge:
In MOT Challenge, the 4 preproc steps are as follow:
1) There is only one class (pedestrian) to be evaluated, but all other classes are used for preproc.
2) Predictions are matched against all gt boxes (regardless of class), those matching with distractor
objects are removed.
3) There is no crowd ignore regions.
4) All gt dets except pedestrian are removed, also removes pedestrian gt dets marked with zero_marked.
"""
# Check that input data has unique ids
self._check_unique_ids(raw_data)
distractor_class_names = ['person_on_vehicle', 'static_person', 'distractor', 'reflection']
if self.benchmark == 'MOT20':
distractor_class_names.append('non_mot_vehicle')
distractor_classes = [self.class_name_to_class_id[x] for x in distractor_class_names]
cls_id = self.class_name_to_class_id[cls]
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Get all data
gt_ids = raw_data['gt_ids'][t]
gt_dets = raw_data['gt_dets'][t]
gt_classes = raw_data['gt_classes'][t]
gt_zero_marked = raw_data['gt_extras'][t]['zero_marked']
tracker_ids = raw_data['tracker_ids'][t]
tracker_dets = raw_data['tracker_dets'][t]
tracker_classes = raw_data['tracker_classes'][t]
tracker_confidences = raw_data['tracker_confidences'][t]
similarity_scores = raw_data['similarity_scores'][t]
# Evaluation is ONLY valid for pedestrian class
if len(tracker_classes) > 0 and np.max(tracker_classes) > 1:
raise TrackEvalException(
'Evaluation is only valid for pedestrian class. Non pedestrian class (%i) found in sequence %s at '
'timestep %i.' % (np.max(tracker_classes), raw_data['seq'], t))
# Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
# which are labeled as belonging to a distractor class.
to_remove_tracker = np.array([], np.int)
if self.do_preproc and self.benchmark != 'MOT15' and gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
# Check all classes are valid:
invalid_classes = np.setdiff1d(np.unique(gt_classes), self.valid_class_numbers)
if len(invalid_classes) > 0:
print(' '.join([str(x) for x in invalid_classes]))
raise(TrackEvalException('Attempting to evaluate using invalid gt classes. '
'This warning only triggers if preprocessing is performed, '
'e.g. not for MOT15 or where prepropressing is explicitly disabled. '
'Please either check your gt data, or disable preprocessing. '
'The following invalid classes were found in timestep ' + str(t) + ': ' +
' '.join([str(x) for x in invalid_classes])))
matching_scores = similarity_scores.copy()
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
match_rows, match_cols = linear_sum_assignment(-matching_scores)
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
match_rows = match_rows[actually_matched_mask]
match_cols = match_cols[actually_matched_mask]
is_distractor_class = np.isin(gt_classes[match_rows], distractor_classes)
to_remove_tracker = match_cols[is_distractor_class]
# Apply preprocessing to remove all unwanted tracker dets.
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
# Remove gt detections marked as to remove (zero marked), and also remove gt detections not in pedestrian
# class (not applicable for MOT15)
if self.do_preproc and self.benchmark != 'MOT15':
gt_to_keep_mask = (np.not_equal(gt_zero_marked, 0)) & \
(np.equal(gt_classes, cls_id))
else:
# There are no classes for MOT15
gt_to_keep_mask = np.not_equal(gt_zero_marked, 0)
data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
data['seq'] = raw_data['seq']
# Ensure again that ids are unique per timestep after preproc.
self._check_unique_ids(data, after_preproc=True)
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='xywh')
return similarity_scores

View File

@@ -0,0 +1,446 @@
import os
import csv
import configparser
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_dataset import _BaseDataset
from .. import utils
from .. import _timing
from ..utils import TrackEvalException
class MOTSChallenge(_BaseDataset):
"""Dataset class for MOTS Challenge tracking"""
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/mot_challenge/'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/mot_challenge/'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'CLASSES_TO_EVAL': ['pedestrian'], # Valid: ['pedestrian']
'SPLIT_TO_EVAL': 'train', # Valid: 'train', 'test'
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
'PRINT_CONFIG': True, # Whether to print current config
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use seqmap_folder/MOTS-split_to_eval)
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt', # '{gt_folder}/{seq}/gt/gt.txt'
'SKIP_SPLIT_FOL': False, # If False, data is in GT_FOLDER/MOTS-SPLIT_TO_EVAL/ and in
# TRACKERS_FOLDER/MOTS-SPLIT_TO_EVAL/tracker/
# If True, then the middle 'MOTS-split' folder is skipped for both.
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
self.benchmark = 'MOTS'
self.gt_set = self.benchmark + '-' + self.config['SPLIT_TO_EVAL']
if not self.config['SKIP_SPLIT_FOL']:
split_fol = self.gt_set
else:
split_fol = ''
self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
self.should_classes_combine = False
self.use_super_categories = False
self.data_is_zipped = self.config['INPUT_AS_ZIP']
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
# Get classes to eval
self.valid_classes = ['pedestrian']
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
for cls in self.config['CLASSES_TO_EVAL']]
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
self.class_name_to_class_id = {'pedestrian': '2', 'ignore': '10'}
# Get sequences to eval and check gt files exist
self.seq_list, self.seq_lengths = self._get_seq_info()
if len(self.seq_list) < 1:
raise TrackEvalException('No sequences are selected to be evaluated.')
# Check gt files exist
for seq in self.seq_list:
if not self.data_is_zipped:
curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
if not os.path.isfile(curr_file):
print('GT file not found ' + curr_file)
raise TrackEvalException('GT file not found for sequence: ' + seq)
if self.data_is_zipped:
curr_file = os.path.join(self.gt_fol, 'data.zip')
if not os.path.isfile(curr_file):
print('GT file not found ' + curr_file)
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
for tracker in self.tracker_list:
if self.data_is_zipped:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
else:
for seq in self.seq_list:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException(
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
curr_file))
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _get_seq_info(self):
seq_list = []
seq_lengths = {}
if self.config["SEQ_INFO"]:
seq_list = list(self.config["SEQ_INFO"].keys())
seq_lengths = self.config["SEQ_INFO"]
# If sequence length is 'None' tries to read sequence length from .ini files.
for seq, seq_length in seq_lengths.items():
if seq_length is None:
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
if not os.path.isfile(ini_file):
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
ini_data = configparser.ConfigParser()
ini_data.read(ini_file)
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
else:
if self.config["SEQMAP_FILE"]:
seqmap_file = self.config["SEQMAP_FILE"]
else:
if self.config["SEQMAP_FOLDER"] is None:
seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
else:
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
if not os.path.isfile(seqmap_file):
print('no seqmap found: ' + seqmap_file)
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
with open(seqmap_file) as fp:
reader = csv.reader(fp)
for i, row in enumerate(reader):
if i == 0 or row[0] == '':
continue
seq = row[0]
seq_list.append(seq)
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
if not os.path.isfile(ini_file):
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
ini_data = configparser.ConfigParser()
ini_data.read(ini_file)
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
return seq_list, seq_lengths
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the MOTS Challenge format
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets]: list (for each timestep) of lists of detections.
[gt_ignore_region]: list (for each timestep) of masks for the ignore regions
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
"""
# Only loaded when run to reduce minimum requirements
from pycocotools import mask as mask_utils
# File location
if self.data_is_zipped:
if is_gt:
zip_file = os.path.join(self.gt_fol, 'data.zip')
else:
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
file = seq + '.txt'
else:
zip_file = None
if is_gt:
file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
else:
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
# Ignore regions
if is_gt:
crowd_ignore_filter = {2: ['10']}
else:
crowd_ignore_filter = None
# Load raw data from text file
read_data, ignore_data = self._load_simple_text_file(file, crowd_ignore_filter=crowd_ignore_filter,
is_zipped=self.data_is_zipped, zip_file=zip_file,
force_delimiters=' ')
# Convert data to required format
num_timesteps = self.seq_lengths[seq]
data_keys = ['ids', 'classes', 'dets']
if is_gt:
data_keys += ['gt_ignore_region']
raw_data = {key: [None] * num_timesteps for key in data_keys}
# Check for any extra time keys
current_time_keys = [str(t + 1) for t in range(num_timesteps)]
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
if len(extra_time_keys) > 0:
if is_gt:
text = 'Ground-truth'
else:
text = 'Tracking'
raise TrackEvalException(
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
[str(x) + ', ' for x in extra_time_keys]))
for t in range(num_timesteps):
time_key = str(t+1)
# list to collect all masks of a timestep to check for overlapping areas
all_masks = []
if time_key in read_data.keys():
try:
raw_data['dets'][t] = [{'size': [int(region[3]), int(region[4])],
'counts': region[5].encode(encoding='UTF-8')}
for region in read_data[time_key]]
raw_data['ids'][t] = np.atleast_1d([region[1] for region in read_data[time_key]]).astype(int)
raw_data['classes'][t] = np.atleast_1d([region[2] for region in read_data[time_key]]).astype(int)
all_masks += raw_data['dets'][t]
except IndexError:
self._raise_index_error(is_gt, tracker, seq)
except ValueError:
self._raise_value_error(is_gt, tracker, seq)
else:
raw_data['dets'][t] = []
raw_data['ids'][t] = np.empty(0).astype(int)
raw_data['classes'][t] = np.empty(0).astype(int)
if is_gt:
if time_key in ignore_data.keys():
try:
time_ignore = [{'size': [int(region[3]), int(region[4])],
'counts': region[5].encode(encoding='UTF-8')}
for region in ignore_data[time_key]]
raw_data['gt_ignore_region'][t] = mask_utils.merge([mask for mask in time_ignore],
intersect=False)
all_masks += [raw_data['gt_ignore_region'][t]]
except IndexError:
self._raise_index_error(is_gt, tracker, seq)
except ValueError:
self._raise_value_error(is_gt, tracker, seq)
else:
raw_data['gt_ignore_region'][t] = mask_utils.merge([], intersect=False)
# check for overlapping masks
if all_masks:
masks_merged = all_masks[0]
for mask in all_masks[1:]:
if mask_utils.area(mask_utils.merge([masks_merged, mask], intersect=True)) != 0.0:
raise TrackEvalException(
'Tracker has overlapping masks. Tracker: ' + tracker + ' Seq: ' + seq + ' Timestep: ' + str(
t))
masks_merged = mask_utils.merge([masks_merged, mask], intersect=False)
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data['num_timesteps'] = num_timesteps
raw_data['seq'] = seq
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detection masks.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
MOTS Challenge:
In MOTS Challenge, the 4 preproc steps are as follow:
1) There is only one class (pedestrians) to be evaluated.
2) There are no ground truth detections marked as to be removed/distractor classes.
Therefore also no matched tracker detections are removed.
3) Ignore regions are used to remove unmatched detections (at least 50% overlap with ignore region).
4) There are no ground truth detections (e.g. those of distractor classes) to be removed.
"""
# Check that input data has unique ids
self._check_unique_ids(raw_data)
cls_id = int(self.class_name_to_class_id[cls])
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Only extract relevant dets for this class for preproc and eval (cls)
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
gt_class_mask = gt_class_mask.astype(np.bool)
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
tracker_class_mask = tracker_class_mask.astype(np.bool)
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
tracker_dets = [raw_data['tracker_dets'][t][ind] for ind in range(len(tracker_class_mask)) if
tracker_class_mask[ind]]
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
# Match tracker and gt dets (with hungarian algorithm)
unmatched_indices = np.arange(tracker_ids.shape[0])
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
matching_scores = similarity_scores.copy()
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = -10000
match_rows, match_cols = linear_sum_assignment(-matching_scores)
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
match_cols = match_cols[actually_matched_mask]
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
# For unmatched tracker dets, remove those that are greater than 50% within a crowd ignore region.
unmatched_tracker_dets = [tracker_dets[i] for i in range(len(tracker_dets)) if i in unmatched_indices]
ignore_region = raw_data['gt_ignore_region'][t]
intersection_with_ignore_region = self._calculate_mask_ious(unmatched_tracker_dets, [ignore_region],
is_encoded=True, do_ioa=True)
is_within_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps, axis=1)
# Apply preprocessing to remove unwanted tracker dets.
to_remove_tracker = unmatched_indices[is_within_ignore_region]
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
# Keep all ground truth detections
data['gt_ids'][t] = gt_ids
data['gt_dets'][t] = gt_dets
data['similarity_scores'][t] = similarity_scores
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
data['seq'] = raw_data['seq']
# Ensure again that ids are unique per timestep after preproc.
self._check_unique_ids(data, after_preproc=True)
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
return similarity_scores
@staticmethod
def _raise_index_error(is_gt, tracker, seq):
"""
Auxiliary method to raise an evaluation error in case of an index error while reading files.
:param is_gt: whether gt or tracker data is read
:param tracker: the name of the tracker
:param seq: the name of the seq
:return: None
"""
if is_gt:
err = 'Cannot load gt data from sequence %s, because there are not enough ' \
'columns in the data.' % seq
raise TrackEvalException(err)
else:
err = 'Cannot load tracker data from tracker %s, sequence %s, because there are not enough ' \
'columns in the data.' % (tracker, seq)
raise TrackEvalException(err)
@staticmethod
def _raise_value_error(is_gt, tracker, seq):
"""
Auxiliary method to raise an evaluation error in case of an value error while reading files.
:param is_gt: whether gt or tracker data is read
:param tracker: the name of the tracker
:param seq: the name of the seq
:return: None
"""
if is_gt:
raise TrackEvalException(
'GT data for sequence %s cannot be converted to the right format. Is data corrupted?' % seq)
else:
raise TrackEvalException(
'Tracking data from tracker %s, sequence %s cannot be converted to the right format. '
'Is data corrupted?' % (tracker, seq))

View File

@@ -0,0 +1,452 @@
import os
import csv
import configparser
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_dataset import _BaseDataset
from .. import utils
from .. import _timing
from ..utils import TrackEvalException
class PersonPath22(_BaseDataset):
"""Dataset class for MOT Challenge 2D bounding box tracking"""
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/person_path_22/'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/person_path_22/'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'CLASSES_TO_EVAL': ['pedestrian'], # Valid: ['pedestrian']
'BENCHMARK': 'person_path_22', # Valid: 'person_path_22'
'SPLIT_TO_EVAL': 'test', # Valid: 'train', 'test', 'all'
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
'PRINT_CONFIG': True, # Whether to print current config
'DO_PREPROC': True, # Whether to perform preprocessing
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use seqmap_folder/benchmark-split_to_eval)
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt', # '{gt_folder}/{seq}/gt/gt.txt'
'SKIP_SPLIT_FOL': False, # If False, data is in GT_FOLDER/BENCHMARK-SPLIT_TO_EVAL/ and in
# TRACKERS_FOLDER/BENCHMARK-SPLIT_TO_EVAL/tracker/
# If True, then the middle 'benchmark-split' folder is skipped for both.
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
self.benchmark = self.config['BENCHMARK']
gt_set = self.config['BENCHMARK'] + '-' + self.config['SPLIT_TO_EVAL']
self.gt_set = gt_set
if not self.config['SKIP_SPLIT_FOL']:
split_fol = gt_set
else:
split_fol = ''
self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
self.should_classes_combine = False
self.use_super_categories = False
self.data_is_zipped = self.config['INPUT_AS_ZIP']
self.do_preproc = self.config['DO_PREPROC']
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
# Get classes to eval
self.valid_classes = ['pedestrian']
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
for cls in self.config['CLASSES_TO_EVAL']]
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
self.class_name_to_class_id = {'pedestrian': 1, 'person_on_vehicle': 2, 'car': 3, 'bicycle': 4, 'motorbike': 5,
'non_mot_vehicle': 6, 'static_person': 7, 'distractor': 8, 'occluder': 9,
'occluder_on_ground': 10, 'occluder_full': 11, 'reflection': 12, 'crowd': 13}
self.valid_class_numbers = list(self.class_name_to_class_id.values())
# Get sequences to eval and check gt files exist
self.seq_list, self.seq_lengths = self._get_seq_info()
if len(self.seq_list) < 1:
raise TrackEvalException('No sequences are selected to be evaluated.')
# Check gt files exist
for seq in self.seq_list:
if not self.data_is_zipped:
curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
if not os.path.isfile(curr_file):
print('GT file not found ' + curr_file)
raise TrackEvalException('GT file not found for sequence: ' + seq)
if self.data_is_zipped:
curr_file = os.path.join(self.gt_fol, 'data.zip')
if not os.path.isfile(curr_file):
print('GT file not found ' + curr_file)
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
for tracker in self.tracker_list:
if self.data_is_zipped:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
else:
for seq in self.seq_list:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException(
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
curr_file))
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _get_seq_info(self):
seq_list = []
seq_lengths = {}
if self.config["SEQ_INFO"]:
seq_list = list(self.config["SEQ_INFO"].keys())
seq_lengths = self.config["SEQ_INFO"]
# If sequence length is 'None' tries to read sequence length from .ini files.
for seq, seq_length in seq_lengths.items():
if seq_length is None:
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
if not os.path.isfile(ini_file):
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
ini_data = configparser.ConfigParser()
ini_data.read(ini_file)
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
else:
if self.config["SEQMAP_FILE"]:
seqmap_file = self.config["SEQMAP_FILE"]
else:
if self.config["SEQMAP_FOLDER"] is None:
seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
else:
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
if not os.path.isfile(seqmap_file):
print('no seqmap found: ' + seqmap_file)
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
with open(seqmap_file) as fp:
reader = csv.reader(fp)
for i, row in enumerate(reader):
if i == 0 or row[0] == '':
continue
seq = row[0]
seq_list.append(seq)
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
if not os.path.isfile(ini_file):
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
ini_data = configparser.ConfigParser()
ini_data.read(ini_file)
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
return seq_list, seq_lengths
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the MOT Challenge 2D box format
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
[gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
"""
# File location
if self.data_is_zipped:
if is_gt:
zip_file = os.path.join(self.gt_fol, 'data.zip')
else:
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
file = seq + '.txt'
else:
zip_file = None
if is_gt:
file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
else:
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
# Ignore regions
if is_gt:
crowd_ignore_filter = {7: ['13']}
else:
crowd_ignore_filter = None
# Load raw data from text file
read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file, crowd_ignore_filter=crowd_ignore_filter)
# Convert data to required format
num_timesteps = self.seq_lengths[seq]
data_keys = ['ids', 'classes', 'dets']
if is_gt:
data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
else:
data_keys += ['tracker_confidences']
raw_data = {key: [None] * num_timesteps for key in data_keys}
# Check for any extra time keys
current_time_keys = [str( t+ 1) for t in range(num_timesteps)]
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
if len(extra_time_keys) > 0:
if is_gt:
text = 'Ground-truth'
else:
text = 'Tracking'
raise TrackEvalException(
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
[str(x) + ', ' for x in extra_time_keys]))
for t in range(num_timesteps):
time_key = str(t+1)
if time_key in read_data.keys():
try:
time_data = np.asarray(read_data[time_key], dtype=np.float)
except ValueError:
if is_gt:
raise TrackEvalException(
'Cannot convert gt data for sequence %s to float. Is data corrupted?' % seq)
else:
raise TrackEvalException(
'Cannot convert tracking data from tracker %s, sequence %s to float. Is data corrupted?' % (
tracker, seq))
try:
raw_data['dets'][t] = np.atleast_2d(time_data[:, 2:6])
raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
except IndexError:
if is_gt:
err = 'Cannot load gt data from sequence %s, because there is not enough ' \
'columns in the data.' % seq
raise TrackEvalException(err)
else:
err = 'Cannot load tracker data from tracker %s, sequence %s, because there is not enough ' \
'columns in the data.' % (tracker, seq)
raise TrackEvalException(err)
if time_data.shape[1] >= 8:
raw_data['classes'][t] = np.atleast_1d(time_data[:, 7]).astype(int)
else:
if not is_gt:
raw_data['classes'][t] = np.ones_like(raw_data['ids'][t])
else:
raise TrackEvalException(
'GT data is not in a valid format, there is not enough rows in seq %s, timestep %i.' % (
seq, t))
if is_gt:
gt_extras_dict = {'zero_marked': np.atleast_1d(time_data[:, 6].astype(int))}
raw_data['gt_extras'][t] = gt_extras_dict
else:
raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 6])
else:
raw_data['dets'][t] = np.empty((0, 4))
raw_data['ids'][t] = np.empty(0).astype(int)
raw_data['classes'][t] = np.empty(0).astype(int)
if is_gt:
gt_extras_dict = {'zero_marked': np.empty(0)}
raw_data['gt_extras'][t] = gt_extras_dict
else:
raw_data['tracker_confidences'][t] = np.empty(0)
if is_gt:
if time_key in ignore_data.keys():
time_ignore = np.asarray(ignore_data[time_key], dtype=np.float)
raw_data['gt_crowd_ignore_regions'][t] = np.atleast_2d(time_ignore[:, 2:6])
else:
raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data['num_timesteps'] = num_timesteps
raw_data['seq'] = seq
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
MOT Challenge:
In MOT Challenge, the 4 preproc steps are as follow:
1) There is only one class (pedestrian) to be evaluated, but all other classes are used for preproc.
2) Predictions are matched against all gt boxes (regardless of class), those matching with distractor
objects are removed.
3) There is no crowd ignore regions.
4) All gt dets except pedestrian are removed, also removes pedestrian gt dets marked with zero_marked.
"""
# Check that input data has unique ids
self._check_unique_ids(raw_data)
distractor_class_names = ['person_on_vehicle', 'static_person', 'distractor', 'reflection']
if self.benchmark == 'MOT20':
distractor_class_names.append('non_mot_vehicle')
distractor_classes = [self.class_name_to_class_id[x] for x in distractor_class_names]
cls_id = self.class_name_to_class_id[cls]
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Get all data
gt_ids = raw_data['gt_ids'][t]
gt_dets = raw_data['gt_dets'][t]
gt_classes = raw_data['gt_classes'][t]
gt_zero_marked = raw_data['gt_extras'][t]['zero_marked']
tracker_ids = raw_data['tracker_ids'][t]
tracker_dets = raw_data['tracker_dets'][t]
tracker_classes = raw_data['tracker_classes'][t]
tracker_confidences = raw_data['tracker_confidences'][t]
similarity_scores = raw_data['similarity_scores'][t]
crowd_ignore_regions = raw_data['gt_crowd_ignore_regions'][t]
# Evaluation is ONLY valid for pedestrian class
if len(tracker_classes) > 0 and np.max(tracker_classes) > 1:
raise TrackEvalException(
'Evaluation is only valid for pedestrian class. Non pedestrian class (%i) found in sequence %s at '
'timestep %i.' % (np.max(tracker_classes), raw_data['seq'], t))
# Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
# which are labeled as belonging to a distractor class.
to_remove_tracker = np.array([], np.int)
if self.do_preproc and self.benchmark != 'MOT15' and (gt_ids.shape[0] > 0 or len(crowd_ignore_regions) > 0) and tracker_ids.shape[0] > 0:
# Check all classes are valid:
invalid_classes = np.setdiff1d(np.unique(gt_classes), self.valid_class_numbers)
if len(invalid_classes) > 0:
print(' '.join([str(x) for x in invalid_classes]))
raise(TrackEvalException('Attempting to evaluate using invalid gt classes. '
'This warning only triggers if preprocessing is performed, '
'e.g. not for MOT15 or where prepropressing is explicitly disabled. '
'Please either check your gt data, or disable preprocessing. '
'The following invalid classes were found in timestep ' + str(t) + ': ' +
' '.join([str(x) for x in invalid_classes])))
matching_scores = similarity_scores.copy()
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
match_rows, match_cols = linear_sum_assignment(-matching_scores)
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
match_rows = match_rows[actually_matched_mask]
match_cols = match_cols[actually_matched_mask]
is_distractor_class = np.isin(gt_classes[match_rows], distractor_classes)
to_remove_tracker = match_cols[is_distractor_class]
# remove bounding boxes that overlap with crowd ignore region.
intersection_with_ignore_region = self._calculate_box_ious(tracker_dets, crowd_ignore_regions, box_format='xywh', do_ioa=True)
is_within_crowd_ignore_region = np.any(intersection_with_ignore_region > 0.95 + np.finfo('float').eps, axis=1)
to_remove_tracker = np.unique(np.concatenate([to_remove_tracker, np.where(is_within_crowd_ignore_region)[0]]))
# Apply preprocessing to remove all unwanted tracker dets.
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
# Remove gt detections marked as to remove (zero marked), and also remove gt detections not in pedestrian
# class (not applicable for MOT15)
if self.do_preproc and self.benchmark != 'MOT15':
gt_to_keep_mask = (np.not_equal(gt_zero_marked, 0)) & \
(np.equal(gt_classes, cls_id))
else:
# There are no classes for MOT15
gt_to_keep_mask = np.not_equal(gt_zero_marked, 0)
data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
data['seq'] = raw_data['seq']
# Ensure again that ids are unique per timestep after preproc.
self._check_unique_ids(data, after_preproc=True)
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='xywh')
return similarity_scores

View File

@@ -0,0 +1,508 @@
import os
import csv
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_dataset import _BaseDataset
from .. import utils
from ..utils import TrackEvalException
from .. import _timing
from ..datasets.rob_mots_classmap import cls_id_to_name
class RobMOTS(_BaseDataset):
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/rob_mots'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/rob_mots'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'SUB_BENCHMARK': None, # REQUIRED. Sub-benchmark to eval. If None, then error.
# ['mots_challenge', 'kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'waymo', 'tao']
'CLASSES_TO_EVAL': None, # List of classes to eval. If None, then it does all COCO classes.
'SPLIT_TO_EVAL': 'train', # valid: ['train', 'val', 'test']
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
'PRINT_CONFIG': True, # Whether to print current config
'OUTPUT_SUB_FOLDER': 'results', # Output files are saved in OUTPUT_FOLDER/DATA_LOC_FORMAT/OUTPUT_SUB_FOLDER
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/DATA_LOC_FORMAT/TRACKER_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/dataset_subfolder/seqmaps)
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use SEQMAP_FOLDER/BENCHMARK_SPLIT_TO_EVAL)
'CLSMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/dataset_subfolder/clsmaps)
'CLSMAP_FILE': None, # Directly specify seqmap file (if none use CLSMAP_FOLDER/BENCHMARK_SPLIT_TO_EVAL)
}
return default_config
def __init__(self, config=None):
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config())
self.split = self.config['SPLIT_TO_EVAL']
valid_benchmarks = ['mots_challenge', 'kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'waymo', 'tao']
self.box_gt_benchmarks = ['waymo', 'tao']
self.sub_benchmark = self.config['SUB_BENCHMARK']
if not self.sub_benchmark:
raise TrackEvalException('SUB_BENCHMARK config input is required (there is no default value)' +
', '.join(valid_benchmarks) + ' are valid.')
if self.sub_benchmark not in valid_benchmarks:
raise TrackEvalException('Attempted to evaluate an invalid benchmark: ' + self.sub_benchmark + '. Only benchmarks ' +
', '.join(valid_benchmarks) + ' are valid.')
self.gt_fol = self.config['GT_FOLDER']
self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], self.config['SPLIT_TO_EVAL'])
self.data_is_zipped = self.config['INPUT_AS_ZIP']
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_sub_fol = os.path.join(self.config['OUTPUT_SUB_FOLDER'], self.sub_benchmark)
# Loops through all sub-benchmarks, and reads in seqmaps to info on all sequences to eval.
self._get_seq_info()
if len(self.seq_list) < 1:
raise TrackEvalException('No sequences are selected to be evaluated.')
valid_class_ids = np.atleast_1d(np.genfromtxt(os.path.join(self.gt_fol, self.split, self.sub_benchmark,
'clsmap.txt')))
valid_classes = [cls_id_to_name[int(x)] for x in valid_class_ids] + ['all']
self.valid_class_ids = valid_class_ids
self.class_name_to_class_id = {cls_name: cls_id for cls_id, cls_name in cls_id_to_name.items()}
self.class_name_to_class_id['all'] = -1
if not self.config['CLASSES_TO_EVAL']:
self.class_list = valid_classes
else:
self.class_list = [cls if cls in valid_classes else None
for cls in self.config['CLASSES_TO_EVAL']]
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
', '.join(valid_classes) + ' are valid.')
# Check gt files exist
for seq in self.seq_list:
if not self.data_is_zipped:
curr_file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'data', seq + '.txt')
if not os.path.isfile(curr_file):
print('GT file not found ' + curr_file)
raise TrackEvalException('GT file not found for sequence: ' + seq)
if self.data_is_zipped:
curr_file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'data.zip')
if not os.path.isfile(curr_file):
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
for tracker in self.tracker_list:
if self.data_is_zipped:
curr_file = os.path.join(self.tracker_fol, tracker, 'data.zip')
if not os.path.isfile(curr_file):
raise TrackEvalException('Tracker file not found: ' + os.path.basename(curr_file))
else:
for seq in self.seq_list:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, self.sub_benchmark, seq
+ '.txt')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException(
'Tracker file not found: ' + self.sub_benchmark + '/' + os.path.basename(curr_file))
def get_name(self):
return self.get_class_name() + '.' + self.sub_benchmark
def _get_seq_info(self):
self.seq_list = []
self.seq_lengths = {}
self.seq_sizes = {}
self.seq_ignore_class_ids = {}
if self.config["SEQMAP_FILE"]:
seqmap_file = self.config["SEQMAP_FILE"]
else:
if self.config["SEQMAP_FOLDER"] is None:
seqmap_file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'seqmap.txt')
else:
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.split + '.seqmap')
if not os.path.isfile(seqmap_file):
print('no seqmap found: ' + seqmap_file)
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
with open(seqmap_file) as fp:
dialect = csv.Sniffer().sniff(fp.readline(), delimiters=' ')
fp.seek(0)
reader = csv.reader(fp, dialect)
for i, row in enumerate(reader):
if len(row) >= 4:
# first col: sequence, second col: sequence length, third and fourth col: sequence height/width
# The rest of the columns list the 'sequence ignore class ids' which are classes not penalized as
# FPs for this sequence.
seq = row[0]
self.seq_list.append(seq)
self.seq_lengths[seq] = int(row[1])
self.seq_sizes[seq] = (int(row[2]), int(row[3]))
self.seq_ignore_class_ids[seq] = [int(row[x]) for x in range(4, len(row))]
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the unified RobMOTS format.
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
"""
# import to reduce minimum requirements
from pycocotools import mask as mask_utils
# File location
if self.data_is_zipped:
if is_gt:
zip_file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'data.zip')
else:
zip_file = os.path.join(self.tracker_fol, tracker, 'data.zip')
file = seq + '.txt'
else:
zip_file = None
if is_gt:
file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'data', seq + '.txt')
else:
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, self.sub_benchmark, seq + '.txt')
# Load raw data from text file
read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file,
force_delimiters=' ')
# Convert data to required format
num_timesteps = self.seq_lengths[seq]
data_keys = ['ids', 'classes', 'dets']
if not is_gt:
data_keys += ['tracker_confidences']
raw_data = {key: [None] * num_timesteps for key in data_keys}
for t in range(num_timesteps):
time_key = str(t)
# list to collect all masks of a timestep to check for overlapping areas (for segmentation datasets)
all_valid_masks = []
if time_key in read_data.keys():
try:
raw_data['ids'][t] = np.atleast_1d([det[1] for det in read_data[time_key]]).astype(int)
raw_data['classes'][t] = np.atleast_1d([det[2] for det in read_data[time_key]]).astype(int)
if (not is_gt) or (self.sub_benchmark not in self.box_gt_benchmarks):
raw_data['dets'][t] = [{'size': [int(region[4]), int(region[5])],
'counts': region[6].encode(encoding='UTF-8')}
for region in read_data[time_key]]
all_valid_masks += [mask for mask, cls in zip(raw_data['dets'][t], raw_data['classes'][t]) if
cls < 100]
else:
raw_data['dets'][t] = np.atleast_2d([det[4:8] for det in read_data[time_key]]).astype(float)
if not is_gt:
raw_data['tracker_confidences'][t] = np.atleast_1d([det[3] for det
in read_data[time_key]]).astype(float)
except IndexError:
self._raise_index_error(is_gt, self.sub_benchmark, seq)
except ValueError:
self._raise_value_error(is_gt, self.sub_benchmark, seq)
# no detection in this timestep
else:
if (not is_gt) or (self.sub_benchmark not in self.box_gt_benchmarks):
raw_data['dets'][t] = []
else:
raw_data['dets'][t] = np.empty((0, 4)).astype(float)
raw_data['ids'][t] = np.empty(0).astype(int)
raw_data['classes'][t] = np.empty(0).astype(int)
if not is_gt:
raw_data['tracker_confidences'][t] = np.empty(0).astype(float)
# check for overlapping masks
if all_valid_masks:
masks_merged = all_valid_masks[0]
for mask in all_valid_masks[1:]:
if mask_utils.area(mask_utils.merge([masks_merged, mask], intersect=True)) != 0.0:
err = 'Overlapping masks in frame %d' % t
raise TrackEvalException(err)
masks_merged = mask_utils.merge([masks_merged, mask], intersect=False)
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data['num_timesteps'] = num_timesteps
raw_data['frame_size'] = self.seq_sizes[seq]
raw_data['seq'] = seq
return raw_data
@staticmethod
def _raise_index_error(is_gt, sub_benchmark, seq):
"""
Auxiliary method to raise an evaluation error in case of an index error while reading files.
:param is_gt: whether gt or tracker data is read
:param tracker: the name of the tracker
:param seq: the name of the seq
:return: None
"""
if is_gt:
err = 'Cannot load gt data from sequence %s, because there are not enough ' \
'columns in the data.' % seq
raise TrackEvalException(err)
else:
err = 'Cannot load tracker data from benchmark %s, sequence %s, because there are not enough ' \
'columns in the data.' % (sub_benchmark, seq)
raise TrackEvalException(err)
@staticmethod
def _raise_value_error(is_gt, sub_benchmark, seq):
"""
Auxiliary method to raise an evaluation error in case of an value error while reading files.
:param is_gt: whether gt or tracker data is read
:param tracker: the name of the tracker
:param seq: the name of the seq
:return: None
"""
if is_gt:
raise TrackEvalException(
'GT data for sequence %s cannot be converted to the right format. Is data corrupted?' % seq)
else:
raise TrackEvalException(
'Tracking data from benchmark %s, sequence %s cannot be converted to the right format. '
'Is data corrupted?' % (sub_benchmark, seq))
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
Preprocessing (preproc) occurs in 3 steps.
1) Extract only detections relevant for the class to be evaluated.
2) Match gt dets and tracker dets. Tracker dets that are to a gt det (TPs) are marked as not to be
removed.
3) Remove unmatched tracker dets if they fall within an ignore region or are too small, or if that class
is marked as an ignore class for that sequence.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
Note that there is a special 'all' class, which evaluates all of the COCO classes together in a
'class agnostic' fashion.
"""
# import to reduce minimum requirements
from pycocotools import mask as mask_utils
# Check that input data has unique ids
self._check_unique_ids(raw_data)
cls_id = self.class_name_to_class_id[cls]
ignore_class_id = cls_id+100
seq = raw_data['seq']
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Only extract relevant dets for this class
if cls == 'all':
gt_class_mask = raw_data['gt_classes'][t] < 100
# For waymo, combine predictions for [car, truck, bus, motorcycle] into car, because they are all annotated
# together as one 'vehicle' class.
elif self.sub_benchmark == 'waymo' and cls == 'car':
waymo_vehicle_classes = np.array([3, 4, 6, 8])
gt_class_mask = np.isin(raw_data['gt_classes'][t], waymo_vehicle_classes)
else:
gt_class_mask = raw_data['gt_classes'][t] == cls_id
gt_class_mask = gt_class_mask.astype(np.bool)
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
if cls == 'all':
ignore_regions_mask = raw_data['gt_classes'][t] >= 100
else:
ignore_regions_mask = raw_data['gt_classes'][t] == ignore_class_id
ignore_regions_mask = np.logical_or(ignore_regions_mask, raw_data['gt_classes'][t] == 100)
if self.sub_benchmark in self.box_gt_benchmarks:
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
ignore_regions_box = raw_data['gt_dets'][t][ignore_regions_mask]
if len(ignore_regions_box) > 0:
ignore_regions_box[:, 2] = ignore_regions_box[:, 2] - ignore_regions_box[:, 0]
ignore_regions_box[:, 3] = ignore_regions_box[:, 3] - ignore_regions_box[:, 1]
ignore_regions = mask_utils.frPyObjects(ignore_regions_box, self.seq_sizes[seq][0], self.seq_sizes[seq][1])
else:
ignore_regions = []
else:
gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
ignore_regions = [raw_data['gt_dets'][t][ind] for ind in range(len(ignore_regions_mask)) if
ignore_regions_mask[ind]]
if cls == 'all':
tracker_class_mask = np.ones_like(raw_data['tracker_classes'][t])
else:
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
tracker_class_mask = tracker_class_mask.astype(np.bool)
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
tracker_dets = [raw_data['tracker_dets'][t][ind] for ind in range(len(tracker_class_mask)) if
tracker_class_mask[ind]]
tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
tracker_classes = raw_data['tracker_classes'][t][tracker_class_mask]
# Only do preproc if there are ignore regions defined to remove
if tracker_ids.shape[0] > 0:
# Match tracker and gt dets (with hungarian algorithm)
unmatched_indices = np.arange(tracker_ids.shape[0])
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
matching_scores = similarity_scores.copy()
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
match_rows, match_cols = linear_sum_assignment(-matching_scores)
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
# match_rows = match_rows[actually_matched_mask]
match_cols = match_cols[actually_matched_mask]
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
# For unmatched tracker dets remove those that are greater than 50% within an ignore region.
# unmatched_tracker_dets = tracker_dets[unmatched_indices, :]
# crowd_ignore_regions = raw_data['gt_ignore_regions'][t]
# intersection_with_ignore_region = self. \
# _calculate_box_ious(unmatched_tracker_dets, crowd_ignore_regions, box_format='x0y0x1y1',
# do_ioa=True)
if cls_id in self.seq_ignore_class_ids[seq]:
# Remove unmatched detections for classes that are marked as 'ignore' for the whole sequence.
to_remove_tracker = unmatched_indices
else:
unmatched_tracker_dets = [tracker_dets[i] for i in range(len(tracker_dets)) if
i in unmatched_indices]
# For unmatched tracker dets remove those that are too small.
tracker_boxes_t = mask_utils.toBbox(unmatched_tracker_dets)
unmatched_widths = tracker_boxes_t[:, 2]
unmatched_heights = tracker_boxes_t[:, 3]
unmatched_size = np.maximum(unmatched_heights, unmatched_widths)
min_size = np.min(self.seq_sizes[seq])/8
is_too_small = unmatched_size <= min_size + np.finfo('float').eps
# For unmatched tracker dets remove those that are greater than 50% within an ignore region.
if ignore_regions:
ignore_region_merged = ignore_regions[0]
for mask in ignore_regions[1:]:
ignore_region_merged = mask_utils.merge([ignore_region_merged, mask], intersect=False)
intersection_with_ignore_region = self. \
_calculate_mask_ious(unmatched_tracker_dets, [ignore_region_merged], is_encoded=True, do_ioa=True)
is_within_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps, axis=1)
to_remove_tracker = unmatched_indices[np.logical_or(is_too_small, is_within_ignore_region)]
else:
to_remove_tracker = unmatched_indices[is_too_small]
# For the special 'all' class, you need to remove unmatched detections from all ignore classes and
# non-evaluated classes.
if cls == 'all':
unmatched_tracker_classes = [tracker_classes[i] for i in range(len(tracker_classes)) if
i in unmatched_indices]
is_ignore_class = np.isin(unmatched_tracker_classes, self.seq_ignore_class_ids[seq])
is_not_evaled_class = np.logical_not(np.isin(unmatched_tracker_classes, self.valid_class_ids))
to_remove_all = unmatched_indices[np.logical_or(is_ignore_class, is_not_evaled_class)]
to_remove_tracker = np.concatenate([to_remove_tracker, to_remove_all], axis=0)
else:
to_remove_tracker = np.array([], dtype=np.int)
# remove all unwanted tracker detections
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
# keep all ground truth detections
data['gt_ids'][t] = gt_ids
data['gt_dets'][t] = gt_dets
data['similarity_scores'][t] = similarity_scores
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
data['seq'] = raw_data['seq']
data['frame_size'] = raw_data['frame_size']
# Ensure that ids are unique per timestep.
self._check_unique_ids(data, after_preproc=True)
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
# Only loaded when run to reduce minimum requirements
from pycocotools import mask as mask_utils
if self.sub_benchmark in self.box_gt_benchmarks:
# Convert tracker masks to bboxes (for benchmarks with only bbox ground-truth),
# and then convert to x0y0x1y1 format.
tracker_boxes_t = mask_utils.toBbox(tracker_dets_t)
tracker_boxes_t[:, 2] = tracker_boxes_t[:, 0] + tracker_boxes_t[:, 2]
tracker_boxes_t[:, 3] = tracker_boxes_t[:, 1] + tracker_boxes_t[:, 3]
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_boxes_t, box_format='x0y0x1y1')
else:
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
return similarity_scores

View File

@@ -0,0 +1,81 @@
cls_id_to_name = {
1: 'person',
2: 'bicycle',
3: 'car',
4: 'motorcycle',
5: 'airplane',
6: 'bus',
7: 'train',
8: 'truck',
9: 'boat',
10: 'traffic light',
11: 'fire hydrant',
12: 'stop sign',
13: 'parking meter',
14: 'bench',
15: 'bird',
16: 'cat',
17: 'dog',
18: 'horse',
19: 'sheep',
20: 'cow',
21: 'elephant',
22: 'bear',
23: 'zebra',
24: 'giraffe',
25: 'backpack',
26: 'umbrella',
27: 'handbag',
28: 'tie',
29: 'suitcase',
30: 'frisbee',
31: 'skis',
32: 'snowboard',
33: 'sports ball',
34: 'kite',
35: 'baseball bat',
36: 'baseball glove',
37: 'skateboard',
38: 'surfboard',
39: 'tennis racket',
40: 'bottle',
41: 'wine glass',
42: 'cup',
43: 'fork',
44: 'knife',
45: 'spoon',
46: 'bowl',
47: 'banana',
48: 'apple',
49: 'sandwich',
50: 'orange',
51: 'broccoli',
52: 'carrot',
53: 'hot dog',
54: 'pizza',
55: 'donut',
56: 'cake',
57: 'chair',
58: 'couch',
59: 'potted plant',
60: 'bed',
61: 'dining table',
62: 'toilet',
63: 'tv',
64: 'laptop',
65: 'mouse',
66: 'remote',
67: 'keyboard',
68: 'cell phone',
69: 'microwave',
70: 'oven',
71: 'toaster',
72: 'sink',
73: 'refrigerator',
74: 'book',
75: 'clock',
76: 'vase',
77: 'scissors',
78: 'teddy bear',
79: 'hair drier',
80: 'toothbrush'}

View File

@@ -0,0 +1,113 @@
# python3 scripts\run_rob_mots.py --ROBMOTS_SPLIT val --TRACKERS_TO_EVAL tracker_name (e.g. STP) --USE_PARALLEL True --NUM_PARALLEL_CORES 4
import sys
import os
import csv
import numpy as np
from multiprocessing import freeze_support
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
import trackeval # noqa: E402
from trackeval import utils
code_path = utils.get_code_path()
if __name__ == '__main__':
freeze_support()
script_config = {
'ROBMOTS_SPLIT': 'train', # 'train', # valid: 'train', 'val', 'test', 'test_live', 'test_post', 'test_all'
'BENCHMARKS': ['kitti_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'tao'], # 'bdd_mots' coming soon
'GT_FOLDER': os.path.join(code_path, 'data/gt/rob_mots'),
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/rob_mots'),
}
default_eval_config = trackeval.Evaluator.get_default_eval_config()
default_eval_config['PRINT_ONLY_COMBINED'] = True
default_eval_config['DISPLAY_LESS_PROGRESS'] = True
default_dataset_config = trackeval.datasets.RobMOTS.get_default_dataset_config()
config = {**default_eval_config, **default_dataset_config, **script_config}
# Command line interface:
config = utils.update_config(config)
if config['ROBMOTS_SPLIT'] == 'val':
config['BENCHMARKS'] = ['kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis',
'tao', 'mots_challenge']
config['SPLIT_TO_EVAL'] = 'val'
elif config['ROBMOTS_SPLIT'] == 'test' or config['SPLIT_TO_EVAL'] == 'test_live':
config['BENCHMARKS'] = ['kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'tao']
config['SPLIT_TO_EVAL'] = 'test'
elif config['ROBMOTS_SPLIT'] == 'test_post':
config['BENCHMARKS'] = ['mots_challenge', 'waymo']
config['SPLIT_TO_EVAL'] = 'test'
elif config['ROBMOTS_SPLIT'] == 'test_all':
config['BENCHMARKS'] = ['kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis',
'tao', 'mots_challenge', 'waymo']
config['SPLIT_TO_EVAL'] = 'test'
elif config['ROBMOTS_SPLIT'] == 'train':
config['BENCHMARKS'] = ['kitti_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'tao'] # 'bdd_mots' coming soon
config['SPLIT_TO_EVAL'] = 'train'
metrics_config = {'METRICS': ['HOTA']}
# metrics_config = {'METRICS': ['HOTA', 'CLEAR', 'Identity']}
eval_config = {k: v for k, v in config.items() if k in config.keys()}
dataset_config = {k: v for k, v in config.items() if k in config.keys()}
# Run code
dataset_list = []
for bench in config['BENCHMARKS']:
dataset_config['SUB_BENCHMARK'] = bench
dataset_list.append(trackeval.datasets.RobMOTS(dataset_config))
evaluator = trackeval.Evaluator(eval_config)
metrics_list = []
for metric in [trackeval.metrics.HOTA, trackeval.metrics.CLEAR, trackeval.metrics.Identity]:
if metric.get_name() in metrics_config['METRICS']:
metrics_list.append(metric())
if len(metrics_list) == 0:
raise Exception('No metrics selected for evaluation')
output_res, output_msg = evaluator.evaluate(dataset_list, metrics_list)
# For each benchmark, combine the 'all' score with the 'cls_averaged' using geometric mean.
metrics_to_calc = ['HOTA', 'DetA', 'AssA', 'DetRe', 'DetPr', 'AssRe', 'AssPr', 'LocA']
trackers = list(output_res['RobMOTS.' + config['BENCHMARKS'][0]].keys())
for tracker in trackers:
# final_results[benchmark][result_type][metric]
final_results = {}
res = {bench: output_res['RobMOTS.' + bench][tracker]['COMBINED_SEQ'] for bench in config['BENCHMARKS']}
for bench in config['BENCHMARKS']:
final_results[bench] = {'cls_av': {}, 'det_av': {}, 'final': {}}
for metric in metrics_to_calc:
final_results[bench]['cls_av'][metric] = np.mean(res[bench]['cls_comb_cls_av']['HOTA'][metric])
final_results[bench]['det_av'][metric] = np.mean(res[bench]['all']['HOTA'][metric])
final_results[bench]['final'][metric] = \
np.sqrt(final_results[bench]['cls_av'][metric] * final_results[bench]['det_av'][metric])
# Take the arithmetic mean over all the benchmarks
final_results['overall'] = {'cls_av': {}, 'det_av': {}, 'final': {}}
for metric in metrics_to_calc:
final_results['overall']['cls_av'][metric] = \
np.mean([final_results[bench]['cls_av'][metric] for bench in config['BENCHMARKS']])
final_results['overall']['det_av'][metric] = \
np.mean([final_results[bench]['det_av'][metric] for bench in config['BENCHMARKS']])
final_results['overall']['final'][metric] = \
np.mean([final_results[bench]['final'][metric] for bench in config['BENCHMARKS']])
# Save out result
headers = [config['SPLIT_TO_EVAL']] + [x + '___' + metric for x in ['f', 'c', 'd'] for metric in metrics_to_calc]
def rowify(d):
return [d[x][metric] for x in ['final', 'cls_av', 'det_av'] for metric in metrics_to_calc]
out_file = os.path.join(script_config['TRACKERS_FOLDER'], script_config['ROBMOTS_SPLIT'], tracker,
'final_results.csv')
with open(out_file, 'w', newline='') as f:
writer = csv.writer(f, delimiter=',')
writer.writerow(headers)
writer.writerow(['overall'] + rowify(final_results['overall']))
for bench in config['BENCHMARKS']:
if bench == 'overall':
continue
writer.writerow([bench] + rowify(final_results[bench]))

View File

@@ -0,0 +1,566 @@
import os
import numpy as np
import json
import itertools
from collections import defaultdict
from scipy.optimize import linear_sum_assignment
from ..utils import TrackEvalException
from ._base_dataset import _BaseDataset
from .. import utils
from .. import _timing
class TAO(_BaseDataset):
"""Dataset class for TAO tracking"""
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes)
'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val'
'PRINT_CONFIG': True, # Whether to print current config
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
'MAX_DETECTIONS': 300, # Number of maximal allowed detections per image (0 for unlimited)
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
self.gt_fol = self.config['GT_FOLDER']
self.tracker_fol = self.config['TRACKERS_FOLDER']
self.should_classes_combine = True
self.use_super_categories = False
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
if len(gt_dir_files) != 1:
raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
self.gt_data = json.load(f)
# merge categories marked with a merged tag in TAO dataset
self._merge_categories(self.gt_data['annotations'] + self.gt_data['tracks'])
# Get sequences to eval and sequence information
self.seq_list = [vid['name'].replace('/', '-') for vid in self.gt_data['videos']]
self.seq_name_to_seq_id = {vid['name'].replace('/', '-'): vid['id'] for vid in self.gt_data['videos']}
# compute mappings from videos to annotation data
self.videos_to_gt_tracks, self.videos_to_gt_images = self._compute_vid_mappings(self.gt_data['annotations'])
# compute sequence lengths
self.seq_lengths = {vid['id']: 0 for vid in self.gt_data['videos']}
for img in self.gt_data['images']:
self.seq_lengths[img['video_id']] += 1
self.seq_to_images_to_timestep = self._compute_image_to_timestep_mappings()
self.seq_to_classes = {vid['id']: {'pos_cat_ids': list({track['category_id'] for track
in self.videos_to_gt_tracks[vid['id']]}),
'neg_cat_ids': vid['neg_category_ids'],
'not_exhaustively_labeled_cat_ids': vid['not_exhaustive_category_ids']}
for vid in self.gt_data['videos']}
# Get classes to eval
considered_vid_ids = [self.seq_name_to_seq_id[vid] for vid in self.seq_list]
seen_cats = set([cat_id for vid_id in considered_vid_ids for cat_id
in self.seq_to_classes[vid_id]['pos_cat_ids']])
# only classes with ground truth are evaluated in TAO
self.valid_classes = [cls['name'] for cls in self.gt_data['categories'] if cls['id'] in seen_cats]
cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
if self.config['CLASSES_TO_EVAL']:
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
for cls in self.config['CLASSES_TO_EVAL']]
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
', '.join(self.valid_classes) +
' are valid (classes present in ground truth data).')
else:
self.class_list = [cls for cls in self.valid_classes]
self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
for tracker in self.tracker_list:
tr_dir_files = [file for file in os.listdir(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol))
if file.endswith('.json')]
if len(tr_dir_files) != 1:
raise TrackEvalException(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
+ ' does not contain exactly one json file.')
with open(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, tr_dir_files[0])) as f:
curr_data = json.load(f)
# limit detections if MAX_DETECTIONS > 0
if self.config['MAX_DETECTIONS']:
curr_data = self._limit_dets_per_image(curr_data)
# fill missing video ids
self._fill_video_ids_inplace(curr_data)
# make track ids unique over whole evaluation set
self._make_track_ids_unique(curr_data)
# merge categories marked with a merged tag in TAO dataset
self._merge_categories(curr_data)
# get tracker sequence information
curr_videos_to_tracker_tracks, curr_videos_to_tracker_images = self._compute_vid_mappings(curr_data)
self.tracker_data[tracker]['vids_to_tracks'] = curr_videos_to_tracker_tracks
self.tracker_data[tracker]['vids_to_images'] = curr_videos_to_tracker_images
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the TAO format
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets]: list (for each timestep) of lists of detections.
[classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
keys and corresponding segmentations as values) for each track
[classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_lengths]: dictionary with class values
as keys and lists (for each track) as values
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
[classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
keys and corresponding segmentations as values) for each track
[classes_to_dt_track_ids, classes_to_dt_track_areas, classes_to_dt_track_lengths]: dictionary with class values
as keys and lists as values
[classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
"""
seq_id = self.seq_name_to_seq_id[seq]
# File location
if is_gt:
imgs = self.videos_to_gt_images[seq_id]
else:
imgs = self.tracker_data[tracker]['vids_to_images'][seq_id]
# Convert data to required format
num_timesteps = self.seq_lengths[seq_id]
img_to_timestep = self.seq_to_images_to_timestep[seq_id]
data_keys = ['ids', 'classes', 'dets']
if not is_gt:
data_keys += ['tracker_confidences']
raw_data = {key: [None] * num_timesteps for key in data_keys}
for img in imgs:
# some tracker data contains images without any ground truth information, these are ignored
try:
t = img_to_timestep[img['id']]
except KeyError:
continue
annotations = img['annotations']
raw_data['dets'][t] = np.atleast_2d([ann['bbox'] for ann in annotations]).astype(float)
raw_data['ids'][t] = np.atleast_1d([ann['track_id'] for ann in annotations]).astype(int)
raw_data['classes'][t] = np.atleast_1d([ann['category_id'] for ann in annotations]).astype(int)
if not is_gt:
raw_data['tracker_confidences'][t] = np.atleast_1d([ann['score'] for ann in annotations]).astype(float)
for t, d in enumerate(raw_data['dets']):
if d is None:
raw_data['dets'][t] = np.empty((0, 4)).astype(float)
raw_data['ids'][t] = np.empty(0).astype(int)
raw_data['classes'][t] = np.empty(0).astype(int)
if not is_gt:
raw_data['tracker_confidences'][t] = np.empty(0)
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
all_classes = [self.class_name_to_class_id[cls] for cls in self.class_list]
if is_gt:
classes_to_consider = all_classes
all_tracks = self.videos_to_gt_tracks[seq_id]
else:
classes_to_consider = self.seq_to_classes[seq_id]['pos_cat_ids'] \
+ self.seq_to_classes[seq_id]['neg_cat_ids']
all_tracks = self.tracker_data[tracker]['vids_to_tracks'][seq_id]
classes_to_tracks = {cls: [track for track in all_tracks if track['category_id'] == cls]
if cls in classes_to_consider else [] for cls in all_classes}
# mapping from classes to track information
raw_data['classes_to_tracks'] = {cls: [{det['image_id']: np.atleast_1d(det['bbox'])
for det in track['annotations']} for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_lengths'] = {cls: [len(track['annotations']) for track in tracks]
for cls, tracks in classes_to_tracks.items()}
if not is_gt:
raw_data['classes_to_dt_track_scores'] = {cls: np.array([np.mean([float(x['score'])
for x in track['annotations']])
for track in tracks])
for cls, tracks in classes_to_tracks.items()}
if is_gt:
key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
'classes_to_track_ids': 'classes_to_gt_track_ids',
'classes_to_track_lengths': 'classes_to_gt_track_lengths',
'classes_to_track_areas': 'classes_to_gt_track_areas'}
else:
key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
'classes_to_track_ids': 'classes_to_dt_track_ids',
'classes_to_track_lengths': 'classes_to_dt_track_lengths',
'classes_to_track_areas': 'classes_to_dt_track_areas'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data['num_timesteps'] = num_timesteps
raw_data['neg_cat_ids'] = self.seq_to_classes[seq_id]['neg_cat_ids']
raw_data['not_exhaustively_labeled_cls'] = self.seq_to_classes[seq_id]['not_exhaustively_labeled_cat_ids']
raw_data['seq'] = seq
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
TAO:
In TAO, the 4 preproc steps are as follow:
1) All classes present in the ground truth data are evaluated separately.
2) No matched tracker detections are removed.
3) Unmatched tracker detections are removed if there is not ground truth data and the class does not
belong to the categories marked as negative for this sequence. Additionally, unmatched tracker
detections for classes which are marked as not exhaustively labeled are removed.
4) No gt detections are removed.
Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
and the tracks from the tracker data are sorted according to the tracker confidence.
"""
cls_id = self.class_name_to_class_id[cls]
is_not_exhaustively_labeled = cls_id in raw_data['not_exhaustively_labeled_cls']
is_neg_category = cls_id in raw_data['neg_cat_ids']
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Only extract relevant dets for this class for preproc and eval (cls)
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
gt_class_mask = gt_class_mask.astype(np.bool)
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
tracker_class_mask = tracker_class_mask.astype(np.bool)
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
# Match tracker and gt dets (with hungarian algorithm).
unmatched_indices = np.arange(tracker_ids.shape[0])
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
matching_scores = similarity_scores.copy()
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
match_rows, match_cols = linear_sum_assignment(-matching_scores)
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
match_cols = match_cols[actually_matched_mask]
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
if gt_ids.shape[0] == 0 and not is_neg_category:
to_remove_tracker = unmatched_indices
elif is_not_exhaustively_labeled:
to_remove_tracker = unmatched_indices
else:
to_remove_tracker = np.array([], dtype=np.int)
# remove all unwanted unmatched tracker detections
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
data['gt_ids'][t] = gt_ids
data['gt_dets'][t] = gt_dets
data['similarity_scores'][t] = similarity_scores
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
data['seq'] = raw_data['seq']
# get track representations
data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
data['gt_track_lengths'] = raw_data['classes_to_gt_track_lengths'][cls_id]
data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
data['dt_track_lengths'] = raw_data['classes_to_dt_track_lengths'][cls_id]
data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
data['not_exhaustively_labeled'] = is_not_exhaustively_labeled
data['iou_type'] = 'bbox'
# sort tracker data tracks by tracker confidence scores
if data['dt_tracks']:
idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
data['dt_track_lengths'] = [data['dt_track_lengths'][i] for i in idx]
data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
# Ensure that ids are unique per timestep.
self._check_unique_ids(data)
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t)
return similarity_scores
def _merge_categories(self, annotations):
"""
Merges categories with a merged tag. Adapted from https://github.com/TAO-Dataset
:param annotations: the annotations in which the classes should be merged
:return: None
"""
merge_map = {}
for category in self.gt_data['categories']:
if 'merged' in category:
for to_merge in category['merged']:
merge_map[to_merge['id']] = category['id']
for ann in annotations:
ann['category_id'] = merge_map.get(ann['category_id'], ann['category_id'])
def _compute_vid_mappings(self, annotations):
"""
Computes mappings from Videos to corresponding tracks and images.
:param annotations: the annotations for which the mapping should be generated
:return: the video-to-track-mapping, the video-to-image-mapping
"""
vids_to_tracks = {}
vids_to_imgs = {}
vid_ids = [vid['id'] for vid in self.gt_data['videos']]
# compute an mapping from image IDs to images
images = {}
for image in self.gt_data['images']:
images[image['id']] = image
for ann in annotations:
ann["area"] = ann["bbox"][2] * ann["bbox"][3]
vid = ann["video_id"]
if ann["video_id"] not in vids_to_tracks.keys():
vids_to_tracks[ann["video_id"]] = list()
if ann["video_id"] not in vids_to_imgs.keys():
vids_to_imgs[ann["video_id"]] = list()
# Fill in vids_to_tracks
tid = ann["track_id"]
exist_tids = [track["id"] for track in vids_to_tracks[vid]]
try:
index1 = exist_tids.index(tid)
except ValueError:
index1 = -1
if tid not in exist_tids:
curr_track = {"id": tid, "category_id": ann['category_id'],
"video_id": vid, "annotations": [ann]}
vids_to_tracks[vid].append(curr_track)
else:
vids_to_tracks[vid][index1]["annotations"].append(ann)
# Fill in vids_to_imgs
img_id = ann['image_id']
exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
try:
index2 = exist_img_ids.index(img_id)
except ValueError:
index2 = -1
if index2 == -1:
curr_img = {"id": img_id, "annotations": [ann]}
vids_to_imgs[vid].append(curr_img)
else:
vids_to_imgs[vid][index2]["annotations"].append(ann)
# sort annotations by frame index and compute track area
for vid, tracks in vids_to_tracks.items():
for track in tracks:
track["annotations"] = sorted(
track['annotations'],
key=lambda x: images[x['image_id']]['frame_index'])
# Computer average area
track["area"] = (sum(x['area'] for x in track['annotations']) / len(track['annotations']))
# Ensure all videos are present
for vid_id in vid_ids:
if vid_id not in vids_to_tracks.keys():
vids_to_tracks[vid_id] = []
if vid_id not in vids_to_imgs.keys():
vids_to_imgs[vid_id] = []
return vids_to_tracks, vids_to_imgs
def _compute_image_to_timestep_mappings(self):
"""
Computes a mapping from images to the corresponding timestep in the sequence.
:return: the image-to-timestep-mapping
"""
images = {}
for image in self.gt_data['images']:
images[image['id']] = image
seq_to_imgs_to_timestep = {vid['id']: dict() for vid in self.gt_data['videos']}
for vid in seq_to_imgs_to_timestep:
curr_imgs = [img['id'] for img in self.videos_to_gt_images[vid]]
curr_imgs = sorted(curr_imgs, key=lambda x: images[x]['frame_index'])
seq_to_imgs_to_timestep[vid] = {curr_imgs[i]: i for i in range(len(curr_imgs))}
return seq_to_imgs_to_timestep
def _limit_dets_per_image(self, annotations):
"""
Limits the number of detections for each image to config['MAX_DETECTIONS']. Adapted from
https://github.com/TAO-Dataset/
:param annotations: the annotations in which the detections should be limited
:return: the annotations with limited detections
"""
max_dets = self.config['MAX_DETECTIONS']
img_ann = defaultdict(list)
for ann in annotations:
img_ann[ann["image_id"]].append(ann)
for img_id, _anns in img_ann.items():
if len(_anns) <= max_dets:
continue
_anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
img_ann[img_id] = _anns[:max_dets]
return [ann for anns in img_ann.values() for ann in anns]
def _fill_video_ids_inplace(self, annotations):
"""
Fills in missing video IDs inplace. Adapted from https://github.com/TAO-Dataset/
:param annotations: the annotations for which the videos IDs should be filled inplace
:return: None
"""
missing_video_id = [x for x in annotations if 'video_id' not in x]
if missing_video_id:
image_id_to_video_id = {
x['id']: x['video_id'] for x in self.gt_data['images']
}
for x in missing_video_id:
x['video_id'] = image_id_to_video_id[x['image_id']]
@staticmethod
def _make_track_ids_unique(annotations):
"""
Makes the track IDs unqiue over the whole annotation set. Adapted from https://github.com/TAO-Dataset/
:param annotations: the annotation set
:return: the number of updated IDs
"""
track_id_videos = {}
track_ids_to_update = set()
max_track_id = 0
for ann in annotations:
t = ann['track_id']
if t not in track_id_videos:
track_id_videos[t] = ann['video_id']
if ann['video_id'] != track_id_videos[t]:
# Track id is assigned to multiple videos
track_ids_to_update.add(t)
max_track_id = max(max_track_id, t)
if track_ids_to_update:
print('true')
next_id = itertools.count(max_track_id + 1)
new_track_ids = defaultdict(lambda: next(next_id))
for ann in annotations:
t = ann['track_id']
v = ann['video_id']
if t in track_ids_to_update:
ann['track_id'] = new_track_ids[t, v]
return len(track_ids_to_update)

View File

@@ -0,0 +1,652 @@
import os
import numpy as np
import json
import itertools
from collections import defaultdict
from scipy.optimize import linear_sum_assignment
from ..utils import TrackEvalException
from ._base_dataset import _BaseDataset
from .. import utils
from .. import _timing
class TAO_OW(_BaseDataset):
"""Dataset class for TAO tracking"""
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes)
'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val'
'PRINT_CONFIG': True, # Whether to print current config
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
'MAX_DETECTIONS': 300, # Number of maximal allowed detections per image (0 for unlimited)
'SUBSET': 'all'
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
self.gt_fol = self.config['GT_FOLDER']
self.tracker_fol = self.config['TRACKERS_FOLDER']
self.should_classes_combine = True
self.use_super_categories = False
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
if len(gt_dir_files) != 1:
raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
self.gt_data = json.load(f)
self.subset = self.config['SUBSET']
if self.subset != 'all':
# Split GT data into `known`, `unknown` or `distractor`
self._split_known_unknown_distractor()
self.gt_data = self._filter_gt_data(self.gt_data)
# merge categories marked with a merged tag in TAO dataset
self._merge_categories(self.gt_data['annotations'] + self.gt_data['tracks'])
# Get sequences to eval and sequence information
self.seq_list = [vid['name'].replace('/', '-') for vid in self.gt_data['videos']]
self.seq_name_to_seq_id = {vid['name'].replace('/', '-'): vid['id'] for vid in self.gt_data['videos']}
# compute mappings from videos to annotation data
self.videos_to_gt_tracks, self.videos_to_gt_images = self._compute_vid_mappings(self.gt_data['annotations'])
# compute sequence lengths
self.seq_lengths = {vid['id']: 0 for vid in self.gt_data['videos']}
for img in self.gt_data['images']:
self.seq_lengths[img['video_id']] += 1
self.seq_to_images_to_timestep = self._compute_image_to_timestep_mappings()
self.seq_to_classes = {vid['id']: {'pos_cat_ids': list({track['category_id'] for track
in self.videos_to_gt_tracks[vid['id']]}),
'neg_cat_ids': vid['neg_category_ids'],
'not_exhaustively_labeled_cat_ids': vid['not_exhaustive_category_ids']}
for vid in self.gt_data['videos']}
# Get classes to eval
considered_vid_ids = [self.seq_name_to_seq_id[vid] for vid in self.seq_list]
seen_cats = set([cat_id for vid_id in considered_vid_ids for cat_id
in self.seq_to_classes[vid_id]['pos_cat_ids']])
# only classes with ground truth are evaluated in TAO
self.valid_classes = [cls['name'] for cls in self.gt_data['categories'] if cls['id'] in seen_cats]
# cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
if self.config['CLASSES_TO_EVAL']:
# self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
# for cls in self.config['CLASSES_TO_EVAL']]
self.class_list = ["object"] # class-agnostic
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
', '.join(self.valid_classes) +
' are valid (classes present in ground truth data).')
else:
# self.class_list = [cls for cls in self.valid_classes]
self.class_list = ["object"] # class-agnostic
# self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
self.class_name_to_class_id = {"object": 1} # class-agnostic
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
for tracker in self.tracker_list:
tr_dir_files = [file for file in os.listdir(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol))
if file.endswith('.json')]
if len(tr_dir_files) != 1:
raise TrackEvalException(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
+ ' does not contain exactly one json file.')
with open(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, tr_dir_files[0])) as f:
curr_data = json.load(f)
# limit detections if MAX_DETECTIONS > 0
if self.config['MAX_DETECTIONS']:
curr_data = self._limit_dets_per_image(curr_data)
# fill missing video ids
self._fill_video_ids_inplace(curr_data)
# make track ids unique over whole evaluation set
self._make_track_ids_unique(curr_data)
# merge categories marked with a merged tag in TAO dataset
self._merge_categories(curr_data)
# get tracker sequence information
curr_videos_to_tracker_tracks, curr_videos_to_tracker_images = self._compute_vid_mappings(curr_data)
self.tracker_data[tracker]['vids_to_tracks'] = curr_videos_to_tracker_tracks
self.tracker_data[tracker]['vids_to_images'] = curr_videos_to_tracker_images
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the TAO format
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets]: list (for each timestep) of lists of detections.
[classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
keys and corresponding segmentations as values) for each track
[classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_lengths]: dictionary with class values
as keys and lists (for each track) as values
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
[classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
keys and corresponding segmentations as values) for each track
[classes_to_dt_track_ids, classes_to_dt_track_areas, classes_to_dt_track_lengths]: dictionary with class values
as keys and lists as values
[classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
"""
seq_id = self.seq_name_to_seq_id[seq]
# File location
if is_gt:
imgs = self.videos_to_gt_images[seq_id]
else:
imgs = self.tracker_data[tracker]['vids_to_images'][seq_id]
# Convert data to required format
num_timesteps = self.seq_lengths[seq_id]
img_to_timestep = self.seq_to_images_to_timestep[seq_id]
data_keys = ['ids', 'classes', 'dets']
if not is_gt:
data_keys += ['tracker_confidences']
raw_data = {key: [None] * num_timesteps for key in data_keys}
for img in imgs:
# some tracker data contains images without any ground truth information, these are ignored
try:
t = img_to_timestep[img['id']]
except KeyError:
continue
annotations = img['annotations']
raw_data['dets'][t] = np.atleast_2d([ann['bbox'] for ann in annotations]).astype(float)
raw_data['ids'][t] = np.atleast_1d([ann['track_id'] for ann in annotations]).astype(int)
raw_data['classes'][t] = np.atleast_1d([1 for _ in annotations]).astype(int) # class-agnostic
if not is_gt:
raw_data['tracker_confidences'][t] = np.atleast_1d([ann['score'] for ann in annotations]).astype(float)
for t, d in enumerate(raw_data['dets']):
if d is None:
raw_data['dets'][t] = np.empty((0, 4)).astype(float)
raw_data['ids'][t] = np.empty(0).astype(int)
raw_data['classes'][t] = np.empty(0).astype(int)
if not is_gt:
raw_data['tracker_confidences'][t] = np.empty(0)
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
# all_classes = [self.class_name_to_class_id[cls] for cls in self.class_list]
all_classes = [1] # class-agnostic
if is_gt:
classes_to_consider = all_classes
all_tracks = self.videos_to_gt_tracks[seq_id]
else:
# classes_to_consider = self.seq_to_classes[seq_id]['pos_cat_ids'] \
# + self.seq_to_classes[seq_id]['neg_cat_ids']
classes_to_consider = all_classes # class-agnostic
all_tracks = self.tracker_data[tracker]['vids_to_tracks'][seq_id]
# classes_to_tracks = {cls: [track for track in all_tracks if track['category_id'] == cls]
# if cls in classes_to_consider else [] for cls in all_classes}
classes_to_tracks = {cls: [track for track in all_tracks]
if cls in classes_to_consider else [] for cls in all_classes} # class-agnostic
# mapping from classes to track information
raw_data['classes_to_tracks'] = {cls: [{det['image_id']: np.atleast_1d(det['bbox'])
for det in track['annotations']} for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_lengths'] = {cls: [len(track['annotations']) for track in tracks]
for cls, tracks in classes_to_tracks.items()}
if not is_gt:
raw_data['classes_to_dt_track_scores'] = {cls: np.array([np.mean([float(x['score'])
for x in track['annotations']])
for track in tracks])
for cls, tracks in classes_to_tracks.items()}
if is_gt:
key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
'classes_to_track_ids': 'classes_to_gt_track_ids',
'classes_to_track_lengths': 'classes_to_gt_track_lengths',
'classes_to_track_areas': 'classes_to_gt_track_areas'}
else:
key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
'classes_to_track_ids': 'classes_to_dt_track_ids',
'classes_to_track_lengths': 'classes_to_dt_track_lengths',
'classes_to_track_areas': 'classes_to_dt_track_areas'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data['num_timesteps'] = num_timesteps
raw_data['neg_cat_ids'] = self.seq_to_classes[seq_id]['neg_cat_ids']
raw_data['not_exhaustively_labeled_cls'] = self.seq_to_classes[seq_id]['not_exhaustively_labeled_cat_ids']
raw_data['seq'] = seq
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
TAO:
In TAO, the 4 preproc steps are as follow:
1) All classes present in the ground truth data are evaluated separately.
2) No matched tracker detections are removed.
3) Unmatched tracker detections are removed if there is not ground truth data and the class does not
belong to the categories marked as negative for this sequence. Additionally, unmatched tracker
detections for classes which are marked as not exhaustively labeled are removed.
4) No gt detections are removed.
Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
and the tracks from the tracker data are sorted according to the tracker confidence.
"""
cls_id = self.class_name_to_class_id[cls]
is_not_exhaustively_labeled = cls_id in raw_data['not_exhaustively_labeled_cls']
is_neg_category = cls_id in raw_data['neg_cat_ids']
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Only extract relevant dets for this class for preproc and eval (cls)
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
gt_class_mask = gt_class_mask.astype(np.bool)
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
gt_dets = raw_data['gt_dets'][t][gt_class_mask]
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
tracker_class_mask = tracker_class_mask.astype(np.bool)
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
# Match tracker and gt dets (with hungarian algorithm).
unmatched_indices = np.arange(tracker_ids.shape[0])
if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
matching_scores = similarity_scores.copy()
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
match_rows, match_cols = linear_sum_assignment(-matching_scores)
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
match_cols = match_cols[actually_matched_mask]
unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
if gt_ids.shape[0] == 0 and not is_neg_category:
to_remove_tracker = unmatched_indices
elif is_not_exhaustively_labeled:
to_remove_tracker = unmatched_indices
else:
to_remove_tracker = np.array([], dtype=np.int)
# remove all unwanted unmatched tracker detections
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
data['gt_ids'][t] = gt_ids
data['gt_dets'][t] = gt_dets
data['similarity_scores'][t] = similarity_scores
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
data['seq'] = raw_data['seq']
# get track representations
data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
data['gt_track_lengths'] = raw_data['classes_to_gt_track_lengths'][cls_id]
data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
data['dt_track_lengths'] = raw_data['classes_to_dt_track_lengths'][cls_id]
data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
data['not_exhaustively_labeled'] = is_not_exhaustively_labeled
data['iou_type'] = 'bbox'
# sort tracker data tracks by tracker confidence scores
if data['dt_tracks']:
idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
data['dt_track_lengths'] = [data['dt_track_lengths'][i] for i in idx]
data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
# Ensure that ids are unique per timestep.
self._check_unique_ids(data)
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t)
return similarity_scores
def _merge_categories(self, annotations):
"""
Merges categories with a merged tag. Adapted from https://github.com/TAO-Dataset
:param annotations: the annotations in which the classes should be merged
:return: None
"""
merge_map = {}
for category in self.gt_data['categories']:
if 'merged' in category:
for to_merge in category['merged']:
merge_map[to_merge['id']] = category['id']
for ann in annotations:
ann['category_id'] = merge_map.get(ann['category_id'], ann['category_id'])
def _compute_vid_mappings(self, annotations):
"""
Computes mappings from Videos to corresponding tracks and images.
:param annotations: the annotations for which the mapping should be generated
:return: the video-to-track-mapping, the video-to-image-mapping
"""
vids_to_tracks = {}
vids_to_imgs = {}
vid_ids = [vid['id'] for vid in self.gt_data['videos']]
# compute an mapping from image IDs to images
images = {}
for image in self.gt_data['images']:
images[image['id']] = image
for ann in annotations:
ann["area"] = ann["bbox"][2] * ann["bbox"][3]
vid = ann["video_id"]
if ann["video_id"] not in vids_to_tracks.keys():
vids_to_tracks[ann["video_id"]] = list()
if ann["video_id"] not in vids_to_imgs.keys():
vids_to_imgs[ann["video_id"]] = list()
# Fill in vids_to_tracks
tid = ann["track_id"]
exist_tids = [track["id"] for track in vids_to_tracks[vid]]
try:
index1 = exist_tids.index(tid)
except ValueError:
index1 = -1
if tid not in exist_tids:
curr_track = {"id": tid, "category_id": ann['category_id'],
"video_id": vid, "annotations": [ann]}
vids_to_tracks[vid].append(curr_track)
else:
vids_to_tracks[vid][index1]["annotations"].append(ann)
# Fill in vids_to_imgs
img_id = ann['image_id']
exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
try:
index2 = exist_img_ids.index(img_id)
except ValueError:
index2 = -1
if index2 == -1:
curr_img = {"id": img_id, "annotations": [ann]}
vids_to_imgs[vid].append(curr_img)
else:
vids_to_imgs[vid][index2]["annotations"].append(ann)
# sort annotations by frame index and compute track area
for vid, tracks in vids_to_tracks.items():
for track in tracks:
track["annotations"] = sorted(
track['annotations'],
key=lambda x: images[x['image_id']]['frame_index'])
# Computer average area
track["area"] = (sum(x['area'] for x in track['annotations']) / len(track['annotations']))
# Ensure all videos are present
for vid_id in vid_ids:
if vid_id not in vids_to_tracks.keys():
vids_to_tracks[vid_id] = []
if vid_id not in vids_to_imgs.keys():
vids_to_imgs[vid_id] = []
return vids_to_tracks, vids_to_imgs
def _compute_image_to_timestep_mappings(self):
"""
Computes a mapping from images to the corresponding timestep in the sequence.
:return: the image-to-timestep-mapping
"""
images = {}
for image in self.gt_data['images']:
images[image['id']] = image
seq_to_imgs_to_timestep = {vid['id']: dict() for vid in self.gt_data['videos']}
for vid in seq_to_imgs_to_timestep:
curr_imgs = [img['id'] for img in self.videos_to_gt_images[vid]]
curr_imgs = sorted(curr_imgs, key=lambda x: images[x]['frame_index'])
seq_to_imgs_to_timestep[vid] = {curr_imgs[i]: i for i in range(len(curr_imgs))}
return seq_to_imgs_to_timestep
def _limit_dets_per_image(self, annotations):
"""
Limits the number of detections for each image to config['MAX_DETECTIONS']. Adapted from
https://github.com/TAO-Dataset/
:param annotations: the annotations in which the detections should be limited
:return: the annotations with limited detections
"""
max_dets = self.config['MAX_DETECTIONS']
img_ann = defaultdict(list)
for ann in annotations:
img_ann[ann["image_id"]].append(ann)
for img_id, _anns in img_ann.items():
if len(_anns) <= max_dets:
continue
_anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
img_ann[img_id] = _anns[:max_dets]
return [ann for anns in img_ann.values() for ann in anns]
def _fill_video_ids_inplace(self, annotations):
"""
Fills in missing video IDs inplace. Adapted from https://github.com/TAO-Dataset/
:param annotations: the annotations for which the videos IDs should be filled inplace
:return: None
"""
missing_video_id = [x for x in annotations if 'video_id' not in x]
if missing_video_id:
image_id_to_video_id = {
x['id']: x['video_id'] for x in self.gt_data['images']
}
for x in missing_video_id:
x['video_id'] = image_id_to_video_id[x['image_id']]
@staticmethod
def _make_track_ids_unique(annotations):
"""
Makes the track IDs unqiue over the whole annotation set. Adapted from https://github.com/TAO-Dataset/
:param annotations: the annotation set
:return: the number of updated IDs
"""
track_id_videos = {}
track_ids_to_update = set()
max_track_id = 0
for ann in annotations:
t = ann['track_id']
if t not in track_id_videos:
track_id_videos[t] = ann['video_id']
if ann['video_id'] != track_id_videos[t]:
# Track id is assigned to multiple videos
track_ids_to_update.add(t)
max_track_id = max(max_track_id, t)
if track_ids_to_update:
print('true')
next_id = itertools.count(max_track_id + 1)
new_track_ids = defaultdict(lambda: next(next_id))
for ann in annotations:
t = ann['track_id']
v = ann['video_id']
if t in track_ids_to_update:
ann['track_id'] = new_track_ids[t, v]
return len(track_ids_to_update)
def _split_known_unknown_distractor(self):
all_ids = set([i for i in range(1, 2000)]) # 2000 is larger than the max category id in TAO-OW.
# `knowns` includes 78 TAO_category_ids that corresponds to 78 COCO classes.
# (The other 2 COCO classes do not have corresponding classes in TAO).
self.knowns = {4, 13, 1038, 544, 1057, 34, 35, 36, 41, 45, 58, 60, 579, 1091, 1097, 1099, 78, 79, 81, 91, 1115,
1117, 95, 1122, 99, 1132, 621, 1135, 625, 118, 1144, 126, 642, 1155, 133, 1162, 139, 154, 174, 185,
699, 1215, 714, 717, 1229, 211, 729, 221, 229, 747, 235, 237, 779, 276, 805, 299, 829, 852, 347,
371, 382, 896, 392, 926, 937, 428, 429, 961, 452, 979, 980, 982, 475, 480, 993, 1001, 502, 1018}
# `distractors` is defined as in the paper "Opening up Open-World Tracking"
self.distractors = {20, 63, 108, 180, 188, 204, 212, 247, 303, 403, 407, 415, 490, 504, 507, 513, 529, 567,
569, 588, 672, 691, 702, 708, 711, 720, 736, 737, 798, 813, 815, 827, 831, 851, 877, 883,
912, 971, 976, 1130, 1133, 1134, 1169, 1184, 1220}
self.unknowns = all_ids.difference(self.knowns.union(self.distractors))
def _filter_gt_data(self, raw_gt_data):
"""
Filter out irrelevant data in the raw_gt_data
Args:
raw_gt_data: directly loaded from json.
Returns:
filtered gt_data
"""
valid_cat_ids = list()
if self.subset == "known":
valid_cat_ids = self.knowns
elif self.subset == "distractor":
valid_cat_ids = self.distractors
elif self.subset == "unknown":
valid_cat_ids = self.unknowns
# elif self.subset == "test_only_unknowns":
# valid_cat_ids = test_only_unknowns
else:
raise Exception("The parameter `SUBSET` is incorrect")
filtered = dict()
filtered["videos"] = raw_gt_data["videos"]
# filtered["videos"] = list()
unwanted_vid = set()
# for video in raw_gt_data["videos"]:
# datasrc = video["name"].split('/')[1]
# if datasrc in data_srcs:
# filtered["videos"].append(video)
# else:
# unwanted_vid.add(video["id"])
filtered["annotations"] = list()
for ann in raw_gt_data["annotations"]:
if (ann["video_id"] not in unwanted_vid) and (ann["category_id"] in valid_cat_ids):
filtered["annotations"].append(ann)
filtered["tracks"] = list()
for track in raw_gt_data["tracks"]:
if (track["video_id"] not in unwanted_vid) and (track["category_id"] in valid_cat_ids):
filtered["tracks"].append(track)
filtered["images"] = list()
for image in raw_gt_data["images"]:
if image["video_id"] not in unwanted_vid:
filtered["images"].append(image)
filtered["categories"] = list()
for cat in raw_gt_data["categories"]:
if cat["id"] in valid_cat_ids:
filtered["categories"].append(cat)
filtered["info"] = raw_gt_data["info"]
filtered["licenses"] = raw_gt_data["licenses"]
return filtered

View File

@@ -0,0 +1,438 @@
import os
import csv
import configparser
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_dataset import _BaseDataset
from .. import utils
from .. import _timing
from ..utils import TrackEvalException
class VisDrone2DBox(_BaseDataset):
"""Dataset class for MOT Challenge 2D bounding box tracking"""
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/mot_challenge/'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/mot_challenge/'), # Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'CLASSES_TO_EVAL': ['pedestrain', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'], # Valid: ['pedestrian']
'BENCHMARK': 'MOT17', # Valid: 'MOT17', 'MOT16', 'MOT20', 'MOT15'
'SPLIT_TO_EVAL': 'train', # Valid: 'train', 'test', 'all'
'INPUT_AS_ZIP': False, # Whether tracker input files are zipped
'PRINT_CONFIG': True, # Whether to print current config
'DO_PREPROC': True, # Whether to perform preprocessing (never done for MOT15)
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
'SEQMAP_FOLDER': None, # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
'SEQMAP_FILE': None, # Directly specify seqmap file (if none use seqmap_folder/benchmark-split_to_eval)
'SEQ_INFO': None, # If not None, directly specify sequences to eval and their number of timesteps
'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt', # '{gt_folder}/{seq}/gt/gt.txt'
'SKIP_SPLIT_FOL': False, # If False, data is in GT_FOLDER/BENCHMARK-SPLIT_TO_EVAL/ and in
# TRACKERS_FOLDER/BENCHMARK-SPLIT_TO_EVAL/tracker/
# If True, then the middle 'benchmark-split' folder is skipped for both.
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
self.benchmark = self.config['BENCHMARK']
gt_set = self.config['BENCHMARK'] + '-' + self.config['SPLIT_TO_EVAL']
self.gt_set = gt_set
if not self.config['SKIP_SPLIT_FOL']:
split_fol = gt_set
else:
split_fol = ''
self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
self.should_classes_combine = False
self.use_super_categories = False
self.data_is_zipped = self.config['INPUT_AS_ZIP']
self.do_preproc = self.config['DO_PREPROC']
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
# Get classes to eval
self.valid_classes = ['pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor']
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
for cls in self.config['CLASSES_TO_EVAL']]
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
self.class_name_to_class_id = {'ignored': 0, 'pedestrian': 1, 'people': 2, 'bicycle': 3, 'car': 4, 'van': 5,
'truck': 6, 'tricycle': 7, 'awning-tricycle': 8, 'bus': 9,
'motor': 10, 'other': 11}
self.valid_class_numbers = list(self.class_name_to_class_id.values())
# Get sequences to eval and check gt files exist
self.seq_list, self.seq_lengths = self._get_seq_info()
if len(self.seq_list) < 1:
raise TrackEvalException('No sequences are selected to be evaluated.')
# Check gt files exist
for seq in self.seq_list:
if not self.data_is_zipped:
curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
if not os.path.isfile(curr_file):
print('GT file not found ' + curr_file)
raise TrackEvalException('GT file not found for sequence: ' + seq)
if self.data_is_zipped:
curr_file = os.path.join(self.gt_fol, 'data.zip')
if not os.path.isfile(curr_file):
print('GT file not found ' + curr_file)
raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
for tracker in self.tracker_list:
if self.data_is_zipped:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
else:
for seq in self.seq_list:
curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
if not os.path.isfile(curr_file):
print('Tracker file not found: ' + curr_file)
raise TrackEvalException(
'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
curr_file))
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _get_seq_info(self):
seq_list = []
seq_lengths = {}
if self.config["SEQ_INFO"]:
seq_list = list(self.config["SEQ_INFO"].keys())
seq_lengths = self.config["SEQ_INFO"]
# If sequence length is 'None' tries to read sequence length from .ini files.
for seq, seq_length in seq_lengths.items():
if seq_length is None:
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
if not os.path.isfile(ini_file):
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
ini_data = configparser.ConfigParser()
ini_data.read(ini_file)
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
else:
if self.config["SEQMAP_FILE"]:
seqmap_file = self.config["SEQMAP_FILE"]
else:
if self.config["SEQMAP_FOLDER"] is None:
seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
else:
seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
if not os.path.isfile(seqmap_file):
print('no seqmap found: ' + seqmap_file)
raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
with open(seqmap_file) as fp:
reader = csv.reader(fp)
for i, row in enumerate(reader):
if i == 0 or row[0] == '':
continue
seq = row[0]
seq_list.append(seq)
ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
if not os.path.isfile(ini_file):
raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
ini_data = configparser.ConfigParser()
ini_data.read(ini_file)
seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
return seq_list, seq_lengths
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the MOT Challenge 2D box format
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
[gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
"""
# File location
if self.data_is_zipped:
if is_gt:
zip_file = os.path.join(self.gt_fol, 'data.zip')
else:
zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
file = seq + '.txt'
else:
zip_file = None
if is_gt:
file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
else:
file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
# Load raw data from text file
read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file)
# Convert data to required format
num_timesteps = self.seq_lengths[seq]
data_keys = ['ids', 'classes', 'dets']
if is_gt:
data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
else:
data_keys += ['tracker_confidences']
raw_data = {key: [None] * num_timesteps for key in data_keys}
# Check for any extra time keys
current_time_keys = [str( t+ 1) for t in range(num_timesteps)]
extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
if len(extra_time_keys) > 0:
if is_gt:
text = 'Ground-truth'
else:
text = 'Tracking'
raise TrackEvalException(
text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
[str(x) + ', ' for x in extra_time_keys]))
for t in range(num_timesteps):
time_key = str(t+1)
if time_key in read_data.keys():
try:
time_data = np.asarray(read_data[time_key], dtype=np.float)
except ValueError:
if is_gt:
raise TrackEvalException(
'Cannot convert gt data for sequence %s to float. Is data corrupted?' % seq)
else:
raise TrackEvalException(
'Cannot convert tracking data from tracker %s, sequence %s to float. Is data corrupted?' % (
tracker, seq))
try:
raw_data['dets'][t] = np.atleast_2d(time_data[:, 2:6])
raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
except IndexError:
if is_gt:
err = 'Cannot load gt data from sequence %s, because there is not enough ' \
'columns in the data.' % seq
raise TrackEvalException(err)
else:
err = 'Cannot load tracker data from tracker %s, sequence %s, because there is not enough ' \
'columns in the data.' % (tracker, seq)
raise TrackEvalException(err)
if time_data.shape[1] >= 8:
raw_data['classes'][t] = np.atleast_1d(time_data[:, 7]).astype(int)
else:
if not is_gt:
raw_data['classes'][t] = np.ones_like(raw_data['ids'][t])
else:
raise TrackEvalException(
'GT data is not in a valid format, there is not enough rows in seq %s, timestep %i.' % (
seq, t))
if is_gt:
gt_extras_dict = {'zero_marked': np.atleast_1d(time_data[:, 6].astype(int))}
raw_data['gt_extras'][t] = gt_extras_dict
else:
raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 6])
else:
raw_data['dets'][t] = np.empty((0, 4))
raw_data['ids'][t] = np.empty(0).astype(int)
raw_data['classes'][t] = np.empty(0).astype(int)
if is_gt:
gt_extras_dict = {'zero_marked': np.empty(0)}
raw_data['gt_extras'][t] = gt_extras_dict
else:
raw_data['tracker_confidences'][t] = np.empty(0)
if is_gt:
raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data['num_timesteps'] = num_timesteps
raw_data['seq'] = seq
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
MOT Challenge:
In MOT Challenge, the 4 preproc steps are as follow:
1) There is only one class (pedestrian) to be evaluated, but all other classes are used for preproc.
2) Predictions are matched against all gt boxes (regardless of class), those matching with distractor
objects are removed.
3) There is no crowd ignore regions.
4) All gt dets except pedestrian are removed, also removes pedestrian gt dets marked with zero_marked.
"""
# Check that input data has unique ids
self._check_unique_ids(raw_data)
# distractor_class_names = ['person_on_vehicle', 'static_person', 'distractor', 'reflection']
distractor_class_names = ['ignored', 'other']
if self.benchmark == 'MOT20':
distractor_class_names.append('non_mot_vehicle')
distractor_classes = [self.class_name_to_class_id[x] for x in distractor_class_names]
cls_id = self.class_name_to_class_id[cls]
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Get all data
gt_ids = raw_data['gt_ids'][t]
gt_dets = raw_data['gt_dets'][t]
gt_classes = raw_data['gt_classes'][t]
gt_zero_marked = raw_data['gt_extras'][t]['zero_marked']
tracker_ids = raw_data['tracker_ids'][t]
tracker_dets = raw_data['tracker_dets'][t]
tracker_classes = raw_data['tracker_classes'][t]
tracker_confidences = raw_data['tracker_confidences'][t]
similarity_scores = raw_data['similarity_scores'][t]
# Evaluation is ONLY valid for pedestrian class
if len(tracker_classes) > 0 and np.max(tracker_classes) > 1:
raise TrackEvalException(
'Evaluation is only valid for pedestrian class. Non pedestrian class (%i) found in sequence %s at '
'timestep %i.' % (np.max(tracker_classes), raw_data['seq'], t))
# Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
# which are labeled as belonging to a distractor class.
to_remove_tracker = np.array([], np.int)
if self.do_preproc and self.benchmark != 'MOT15' and gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
# Check all classes are valid:
invalid_classes = np.setdiff1d(np.unique(gt_classes), self.valid_class_numbers)
if len(invalid_classes) > 0:
print(' '.join([str(x) for x in invalid_classes]))
raise(TrackEvalException('Attempting to evaluate using invalid gt classes. '
'This warning only triggers if preprocessing is performed, '
'e.g. not for MOT15 or where prepropressing is explicitly disabled. '
'Please either check your gt data, or disable preprocessing. '
'The following invalid classes were found in timestep ' + str(t) + ': ' +
' '.join([str(x) for x in invalid_classes])))
matching_scores = similarity_scores.copy()
matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
match_rows, match_cols = linear_sum_assignment(-matching_scores)
actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
match_rows = match_rows[actually_matched_mask]
match_cols = match_cols[actually_matched_mask]
is_distractor_class = np.isin(gt_classes[match_rows], distractor_classes)
to_remove_tracker = match_cols[is_distractor_class]
# Apply preprocessing to remove all unwanted tracker dets.
data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
# Remove gt detections marked as to remove (zero marked), and also remove gt detections not in pedestrian
# class (not applicable for MOT15)
if self.do_preproc and self.benchmark != 'MOT15':
gt_to_keep_mask = (np.not_equal(gt_zero_marked, 0)) & \
(np.equal(gt_classes, cls_id))
else:
# There are no classes for MOT15
gt_to_keep_mask = np.not_equal(gt_zero_marked, 0)
data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
data['seq'] = raw_data['seq']
# Ensure again that ids are unique per timestep after preproc.
self._check_unique_ids(data, after_preproc=True)
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='xywh')
return similarity_scores

View File

@@ -0,0 +1,364 @@
import os
import numpy as np
import json
from ._base_dataset import _BaseDataset
from ..utils import TrackEvalException
from .. import utils
from .. import _timing
class YouTubeVIS(_BaseDataset):
"""Dataset class for YouTubeVIS tracking"""
@staticmethod
def get_default_dataset_config():
"""Default class config values"""
code_path = utils.get_code_path()
default_config = {
'GT_FOLDER': os.path.join(code_path, 'data/gt/youtube_vis/'), # Location of GT data
'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/youtube_vis/'),
# Trackers location
'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes)
'SPLIT_TO_EVAL': 'train_sub_split', # Valid: 'train', 'val', 'train_sub_split'
'PRINT_CONFIG': True, # Whether to print current config
'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
}
return default_config
def __init__(self, config=None):
"""Initialise dataset, checking that all required files are present"""
super().__init__()
# Fill non-given config values with defaults
self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
self.gt_fol = self.config['GT_FOLDER'] + 'youtube_vis_' + self.config['SPLIT_TO_EVAL']
self.tracker_fol = self.config['TRACKERS_FOLDER'] + 'youtube_vis_' + self.config['SPLIT_TO_EVAL']
self.use_super_categories = False
self.should_classes_combine = True
self.output_fol = self.config['OUTPUT_FOLDER']
if self.output_fol is None:
self.output_fol = self.tracker_fol
self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
if not os.path.exists(self.gt_fol):
print("GT folder not found: " + self.gt_fol)
raise TrackEvalException("GT folder not found: " + os.path.basename(self.gt_fol))
gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
if len(gt_dir_files) != 1:
raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
self.gt_data = json.load(f)
# Get classes to eval
self.valid_classes = [cls['name'] for cls in self.gt_data['categories']]
cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
if self.config['CLASSES_TO_EVAL']:
self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
for cls in self.config['CLASSES_TO_EVAL']]
if not all(self.class_list):
raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
', '.join(self.valid_classes) + ' are valid.')
else:
self.class_list = [cls['name'] for cls in self.gt_data['categories']]
self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
# Get sequences to eval and check gt files exist
self.seq_list = [vid['file_names'][0].split('/')[0] for vid in self.gt_data['videos']]
self.seq_name_to_seq_id = {vid['file_names'][0].split('/')[0]: vid['id'] for vid in self.gt_data['videos']}
self.seq_lengths = {vid['id']: len(vid['file_names']) for vid in self.gt_data['videos']}
# encode masks and compute track areas
self._prepare_gt_annotations()
# Get trackers to eval
if self.config['TRACKERS_TO_EVAL'] is None:
self.tracker_list = os.listdir(self.tracker_fol)
else:
self.tracker_list = self.config['TRACKERS_TO_EVAL']
if self.config['TRACKER_DISPLAY_NAMES'] is None:
self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
else:
raise TrackEvalException('List of tracker files and tracker display names do not match.')
# counter for globally unique track IDs
self.global_tid_counter = 0
self.tracker_data = dict()
for tracker in self.tracker_list:
tracker_dir_path = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
tr_dir_files = [file for file in os.listdir(tracker_dir_path) if file.endswith('.json')]
if len(tr_dir_files) != 1:
raise TrackEvalException(tracker_dir_path + ' does not contain exactly one json file.')
with open(os.path.join(tracker_dir_path, tr_dir_files[0])) as f:
curr_data = json.load(f)
self.tracker_data[tracker] = curr_data
def get_display_name(self, tracker):
return self.tracker_to_disp[tracker]
def _load_raw_file(self, tracker, seq, is_gt):
"""Load a file (gt or tracker) in the YouTubeVIS format
If is_gt, this returns a dict which contains the fields:
[gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
[gt_dets]: list (for each timestep) of lists of detections.
[classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
keys and corresponding segmentations as values) for each track
[classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_iscrowd]: dictionary with class values
as keys and lists (for each track) as values
if not is_gt, this returns a dict which contains the fields:
[tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
[tracker_dets]: list (for each timestep) of lists of detections.
[classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
keys and corresponding segmentations as values) for each track
[classes_to_dt_track_ids, classes_to_dt_track_areas]: dictionary with class values as keys and lists as values
[classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
"""
# select sequence tracks
seq_id = self.seq_name_to_seq_id[seq]
if is_gt:
tracks = [ann for ann in self.gt_data['annotations'] if ann['video_id'] == seq_id]
else:
tracks = self._get_tracker_seq_tracks(tracker, seq_id)
# Convert data to required format
num_timesteps = self.seq_lengths[seq_id]
data_keys = ['ids', 'classes', 'dets']
if not is_gt:
data_keys += ['tracker_confidences']
raw_data = {key: [None] * num_timesteps for key in data_keys}
for t in range(num_timesteps):
raw_data['dets'][t] = [track['segmentations'][t] for track in tracks if track['segmentations'][t]]
raw_data['ids'][t] = np.atleast_1d([track['id'] for track in tracks
if track['segmentations'][t]]).astype(int)
raw_data['classes'][t] = np.atleast_1d([track['category_id'] for track in tracks
if track['segmentations'][t]]).astype(int)
if not is_gt:
raw_data['tracker_confidences'][t] = np.atleast_1d([track['score'] for track in tracks
if track['segmentations'][t]]).astype(float)
if is_gt:
key_map = {'ids': 'gt_ids',
'classes': 'gt_classes',
'dets': 'gt_dets'}
else:
key_map = {'ids': 'tracker_ids',
'classes': 'tracker_classes',
'dets': 'tracker_dets'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
all_cls_ids = {self.class_name_to_class_id[cls] for cls in self.class_list}
classes_to_tracks = {cls: [track for track in tracks if track['category_id'] == cls] for cls in all_cls_ids}
# mapping from classes to track representations and track information
raw_data['classes_to_tracks'] = {cls: [{i: track['segmentations'][i]
for i in range(len(track['segmentations']))} for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
for cls, tracks in classes_to_tracks.items()}
raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
for cls, tracks in classes_to_tracks.items()}
if is_gt:
raw_data['classes_to_gt_track_iscrowd'] = {cls: [track['iscrowd'] for track in tracks]
for cls, tracks in classes_to_tracks.items()}
else:
raw_data['classes_to_dt_track_scores'] = {cls: np.array([track['score'] for track in tracks])
for cls, tracks in classes_to_tracks.items()}
if is_gt:
key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
'classes_to_track_ids': 'classes_to_gt_track_ids',
'classes_to_track_areas': 'classes_to_gt_track_areas'}
else:
key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
'classes_to_track_ids': 'classes_to_dt_track_ids',
'classes_to_track_areas': 'classes_to_dt_track_areas'}
for k, v in key_map.items():
raw_data[v] = raw_data.pop(k)
raw_data['num_timesteps'] = num_timesteps
raw_data['seq'] = seq
return raw_data
@_timing.time
def get_preprocessed_seq_data(self, raw_data, cls):
""" Preprocess data for a single sequence for a single class ready for evaluation.
Inputs:
- raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
- cls is the class to be evaluated.
Outputs:
- data is a dict containing all of the information that metrics need to perform evaluation.
It contains the following fields:
[num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
[gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
[gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
[similarity_scores]: list (for each timestep) of 2D NDArrays.
Notes:
General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
1) Extract only detections relevant for the class to be evaluated (including distractor detections).
2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
distractor class, or otherwise marked as to be removed.
3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
other criteria (e.g. are too small).
4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
After the above preprocessing steps, this function also calculates the number of gt and tracker detections
and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
unique within each timestep.
YouTubeVIS:
In YouTubeVIS, the 4 preproc steps are as follow:
1) There are 40 classes which are evaluated separately.
2) No matched tracker dets are removed.
3) No unmatched tracker dets are removed.
4) No gt dets are removed.
Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
and the tracks from the tracker data are sorted according to the tracker confidence.
"""
cls_id = self.class_name_to_class_id[cls]
data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
unique_gt_ids = []
unique_tracker_ids = []
num_gt_dets = 0
num_tracker_dets = 0
for t in range(raw_data['num_timesteps']):
# Only extract relevant dets for this class for eval (cls)
gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
gt_class_mask = gt_class_mask.astype(np.bool)
gt_ids = raw_data['gt_ids'][t][gt_class_mask]
gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
tracker_class_mask = tracker_class_mask.astype(np.bool)
tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
tracker_dets = [raw_data['tracker_dets'][t][ind] for ind in range(len(tracker_class_mask)) if
tracker_class_mask[ind]]
similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
data['tracker_ids'][t] = tracker_ids
data['tracker_dets'][t] = tracker_dets
data['gt_ids'][t] = gt_ids
data['gt_dets'][t] = gt_dets
data['similarity_scores'][t] = similarity_scores
unique_gt_ids += list(np.unique(data['gt_ids'][t]))
unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
num_tracker_dets += len(data['tracker_ids'][t])
num_gt_dets += len(data['gt_ids'][t])
# Re-label IDs such that there are no empty IDs
if len(unique_gt_ids) > 0:
unique_gt_ids = np.unique(unique_gt_ids)
gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
for t in range(raw_data['num_timesteps']):
if len(data['gt_ids'][t]) > 0:
data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
if len(unique_tracker_ids) > 0:
unique_tracker_ids = np.unique(unique_tracker_ids)
tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
for t in range(raw_data['num_timesteps']):
if len(data['tracker_ids'][t]) > 0:
data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
# Ensure that ids are unique per timestep.
self._check_unique_ids(data)
# Record overview statistics.
data['num_tracker_dets'] = num_tracker_dets
data['num_gt_dets'] = num_gt_dets
data['num_tracker_ids'] = len(unique_tracker_ids)
data['num_gt_ids'] = len(unique_gt_ids)
data['num_timesteps'] = raw_data['num_timesteps']
data['seq'] = raw_data['seq']
# get track representations
data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
data['gt_track_iscrowd'] = raw_data['classes_to_gt_track_iscrowd'][cls_id]
data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
data['iou_type'] = 'mask'
# sort tracker data tracks by tracker confidence scores
if data['dt_tracks']:
idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
return data
def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
return similarity_scores
def _prepare_gt_annotations(self):
"""
Prepares GT data by rle encoding segmentations and computing the average track area.
:return: None
"""
# only loaded when needed to reduce minimum requirements
from pycocotools import mask as mask_utils
for track in self.gt_data['annotations']:
h = track['height']
w = track['width']
for i, seg in enumerate(track['segmentations']):
if seg:
track['segmentations'][i] = mask_utils.frPyObjects(seg, h, w)
areas = [a for a in track['areas'] if a]
if len(areas) == 0:
track['area'] = 0
else:
track['area'] = np.array(areas).mean()
def _get_tracker_seq_tracks(self, tracker, seq_id):
"""
Prepares tracker data for a given sequence. Extracts all annotations for given sequence ID, computes
average track area and assigns a track ID.
:param tracker: the given tracker
:param seq_id: the sequence ID
:return: the extracted tracks
"""
# only loaded when needed to reduce minimum requirements
from pycocotools import mask as mask_utils
tracks = [ann for ann in self.tracker_data[tracker] if ann['video_id'] == seq_id]
for track in tracks:
track['areas'] = []
for seg in track['segmentations']:
if seg:
track['areas'].append(mask_utils.area(seg))
else:
track['areas'].append(None)
areas = [a for a in track['areas'] if a]
if len(areas) == 0:
track['area'] = 0
else:
track['area'] = np.array(areas).mean()
track['id'] = self.global_tid_counter
self.global_tid_counter += 1
return tracks

View File

@@ -0,0 +1,225 @@
import time
import traceback
from multiprocessing.pool import Pool
from functools import partial
import os
from . import utils
from .utils import TrackEvalException
from . import _timing
from .metrics import Count
try:
import tqdm
TQDM_IMPORTED = True
except ImportError as _:
TQDM_IMPORTED = False
class Evaluator:
"""Evaluator class for evaluating different metrics for different datasets"""
@staticmethod
def get_default_eval_config():
"""Returns the default config values for evaluation"""
code_path = utils.get_code_path()
default_config = {
'USE_PARALLEL': False,
'NUM_PARALLEL_CORES': 8,
'BREAK_ON_ERROR': True, # Raises exception and exits with error
'RETURN_ON_ERROR': False, # if not BREAK_ON_ERROR, then returns from function on error
'LOG_ON_ERROR': os.path.join(code_path, 'error_log.txt'), # if not None, save any errors into a log file.
'PRINT_RESULTS': True,
'PRINT_ONLY_COMBINED': False,
'PRINT_CONFIG': True,
'TIME_PROGRESS': True,
'DISPLAY_LESS_PROGRESS': True,
'OUTPUT_SUMMARY': True,
'OUTPUT_EMPTY_CLASSES': True, # If False, summary files are not output for classes with no detections
'OUTPUT_DETAILED': True,
'PLOT_CURVES': True,
}
return default_config
def __init__(self, config=None):
"""Initialise the evaluator with a config file"""
self.config = utils.init_config(config, self.get_default_eval_config(), 'Eval')
# Only run timing analysis if not run in parallel.
if self.config['TIME_PROGRESS'] and not self.config['USE_PARALLEL']:
_timing.DO_TIMING = True
if self.config['DISPLAY_LESS_PROGRESS']:
_timing.DISPLAY_LESS_PROGRESS = True
@_timing.time
def evaluate(self, dataset_list, metrics_list, show_progressbar=False):
"""Evaluate a set of metrics on a set of datasets"""
config = self.config
metrics_list = metrics_list + [Count()] # Count metrics are always run
metric_names = utils.validate_metrics_list(metrics_list)
dataset_names = [dataset.get_name() for dataset in dataset_list]
output_res = {}
output_msg = {}
for dataset, dataset_name in zip(dataset_list, dataset_names):
# Get dataset info about what to evaluate
output_res[dataset_name] = {}
output_msg[dataset_name] = {}
tracker_list, seq_list, class_list = dataset.get_eval_info()
print('\nEvaluating %i tracker(s) on %i sequence(s) for %i class(es) on %s dataset using the following '
'metrics: %s\n' % (len(tracker_list), len(seq_list), len(class_list), dataset_name,
', '.join(metric_names)))
# Evaluate each tracker
for tracker in tracker_list:
# if not config['BREAK_ON_ERROR'] then go to next tracker without breaking
try:
# Evaluate each sequence in parallel or in series.
# returns a nested dict (res), indexed like: res[seq][class][metric_name][sub_metric field]
# e.g. res[seq_0001][pedestrian][hota][DetA]
print('\nEvaluating %s\n' % tracker)
time_start = time.time()
if config['USE_PARALLEL']:
if show_progressbar and TQDM_IMPORTED:
seq_list_sorted = sorted(seq_list)
with Pool(config['NUM_PARALLEL_CORES']) as pool, tqdm.tqdm(total=len(seq_list)) as pbar:
_eval_sequence = partial(eval_sequence, dataset=dataset, tracker=tracker,
class_list=class_list, metrics_list=metrics_list,
metric_names=metric_names)
results = []
for r in pool.imap(_eval_sequence, seq_list_sorted,
chunksize=20):
results.append(r)
pbar.update()
res = dict(zip(seq_list_sorted, results))
else:
with Pool(config['NUM_PARALLEL_CORES']) as pool:
_eval_sequence = partial(eval_sequence, dataset=dataset, tracker=tracker,
class_list=class_list, metrics_list=metrics_list,
metric_names=metric_names)
results = pool.map(_eval_sequence, seq_list)
res = dict(zip(seq_list, results))
else:
res = {}
if show_progressbar and TQDM_IMPORTED:
seq_list_sorted = sorted(seq_list)
for curr_seq in tqdm.tqdm(seq_list_sorted):
res[curr_seq] = eval_sequence(curr_seq, dataset, tracker, class_list, metrics_list,
metric_names)
else:
for curr_seq in sorted(seq_list):
res[curr_seq] = eval_sequence(curr_seq, dataset, tracker, class_list, metrics_list,
metric_names)
# Combine results over all sequences and then over all classes
# collecting combined cls keys (cls averaged, det averaged, super classes)
combined_cls_keys = []
res['COMBINED_SEQ'] = {}
# combine sequences for each class
for c_cls in class_list:
res['COMBINED_SEQ'][c_cls] = {}
for metric, metric_name in zip(metrics_list, metric_names):
curr_res = {seq_key: seq_value[c_cls][metric_name] for seq_key, seq_value in res.items() if
seq_key != 'COMBINED_SEQ'}
res['COMBINED_SEQ'][c_cls][metric_name] = metric.combine_sequences(curr_res)
# combine classes
if dataset.should_classes_combine:
combined_cls_keys += ['cls_comb_cls_av', 'cls_comb_det_av', 'all']
res['COMBINED_SEQ']['cls_comb_cls_av'] = {}
res['COMBINED_SEQ']['cls_comb_det_av'] = {}
for metric, metric_name in zip(metrics_list, metric_names):
cls_res = {cls_key: cls_value[metric_name] for cls_key, cls_value in
res['COMBINED_SEQ'].items() if cls_key not in combined_cls_keys}
res['COMBINED_SEQ']['cls_comb_cls_av'][metric_name] = \
metric.combine_classes_class_averaged(cls_res)
res['COMBINED_SEQ']['cls_comb_det_av'][metric_name] = \
metric.combine_classes_det_averaged(cls_res)
# combine classes to super classes
if dataset.use_super_categories:
for cat, sub_cats in dataset.super_categories.items():
combined_cls_keys.append(cat)
res['COMBINED_SEQ'][cat] = {}
for metric, metric_name in zip(metrics_list, metric_names):
cat_res = {cls_key: cls_value[metric_name] for cls_key, cls_value in
res['COMBINED_SEQ'].items() if cls_key in sub_cats}
res['COMBINED_SEQ'][cat][metric_name] = metric.combine_classes_det_averaged(cat_res)
# Print and output results in various formats
if config['TIME_PROGRESS']:
print('\nAll sequences for %s finished in %.2f seconds' % (tracker, time.time() - time_start))
output_fol = dataset.get_output_fol(tracker)
tracker_display_name = dataset.get_display_name(tracker)
for c_cls in res['COMBINED_SEQ'].keys(): # class_list + combined classes if calculated
summaries = []
details = []
num_dets = res['COMBINED_SEQ'][c_cls]['Count']['Dets']
if config['OUTPUT_EMPTY_CLASSES'] or num_dets > 0:
for metric, metric_name in zip(metrics_list, metric_names):
# for combined classes there is no per sequence evaluation
if c_cls in combined_cls_keys:
table_res = {'COMBINED_SEQ': res['COMBINED_SEQ'][c_cls][metric_name]}
else:
table_res = {seq_key: seq_value[c_cls][metric_name] for seq_key, seq_value
in res.items()}
if config['PRINT_RESULTS'] and config['PRINT_ONLY_COMBINED']:
dont_print = dataset.should_classes_combine and c_cls not in combined_cls_keys
if not dont_print:
metric.print_table({'COMBINED_SEQ': table_res['COMBINED_SEQ']},
tracker_display_name, c_cls)
elif config['PRINT_RESULTS']:
metric.print_table(table_res, tracker_display_name, c_cls)
if config['OUTPUT_SUMMARY']:
summaries.append(metric.summary_results(table_res))
if config['OUTPUT_DETAILED']:
details.append(metric.detailed_results(table_res))
if config['PLOT_CURVES']:
metric.plot_single_tracker_results(table_res, tracker_display_name, c_cls,
output_fol)
if config['OUTPUT_SUMMARY']:
utils.write_summary_results(summaries, c_cls, output_fol)
if config['OUTPUT_DETAILED']:
utils.write_detailed_results(details, c_cls, output_fol)
# Output for returning from function
output_res[dataset_name][tracker] = res
output_msg[dataset_name][tracker] = 'Success'
except Exception as err:
output_res[dataset_name][tracker] = None
if type(err) == TrackEvalException:
output_msg[dataset_name][tracker] = str(err)
else:
output_msg[dataset_name][tracker] = 'Unknown error occurred.'
print('Tracker %s was unable to be evaluated.' % tracker)
print(err)
traceback.print_exc()
if config['LOG_ON_ERROR'] is not None:
with open(config['LOG_ON_ERROR'], 'a') as f:
print(dataset_name, file=f)
print(tracker, file=f)
print(traceback.format_exc(), file=f)
print('\n\n\n', file=f)
if config['BREAK_ON_ERROR']:
raise err
elif config['RETURN_ON_ERROR']:
return output_res, output_msg
return output_res, output_msg
@_timing.time
def eval_sequence(seq, dataset, tracker, class_list, metrics_list, metric_names):
"""Function for evaluating a single sequence"""
raw_data = dataset.get_raw_seq_data(tracker, seq)
seq_res = {}
for cls in class_list:
seq_res[cls] = {}
data = dataset.get_preprocessed_seq_data(raw_data, cls)
for metric, met_name in zip(metrics_list, metric_names):
seq_res[cls][met_name] = metric.eval_sequence(data)
return seq_res

View File

@@ -0,0 +1,8 @@
from .hota import HOTA
from .clear import CLEAR
from .identity import Identity
from .count import Count
from .j_and_f import JAndF
from .track_map import TrackMAP
from .vace import VACE
from .ideucl import IDEucl

View File

@@ -0,0 +1,133 @@
import numpy as np
from abc import ABC, abstractmethod
from .. import _timing
from ..utils import TrackEvalException
class _BaseMetric(ABC):
@abstractmethod
def __init__(self):
self.plottable = False
self.integer_fields = []
self.float_fields = []
self.array_labels = []
self.integer_array_fields = []
self.float_array_fields = []
self.fields = []
self.summary_fields = []
self.registered = False
#####################################################################
# Abstract functions for subclasses to implement
@_timing.time
@abstractmethod
def eval_sequence(self, data):
...
@abstractmethod
def combine_sequences(self, all_res):
...
@abstractmethod
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
...
@ abstractmethod
def combine_classes_det_averaged(self, all_res):
...
def plot_single_tracker_results(self, all_res, tracker, output_folder, cls):
"""Plot results of metrics, only valid for metrics with self.plottable"""
if self.plottable:
raise NotImplementedError('plot_results is not implemented for metric %s' % self.get_name())
else:
pass
#####################################################################
# Helper functions which are useful for all metrics:
@classmethod
def get_name(cls):
return cls.__name__
@staticmethod
def _combine_sum(all_res, field):
"""Combine sequence results via sum"""
return sum([all_res[k][field] for k in all_res.keys()])
@staticmethod
def _combine_weighted_av(all_res, field, comb_res, weight_field):
"""Combine sequence results via weighted average"""
return sum([all_res[k][field] * all_res[k][weight_field] for k in all_res.keys()]) / np.maximum(1.0, comb_res[
weight_field])
def print_table(self, table_res, tracker, cls):
"""Prints table of results for all sequences"""
print('')
metric_name = self.get_name()
self._row_print([metric_name + ': ' + tracker + '-' + cls] + self.summary_fields)
for seq, results in sorted(table_res.items()):
if seq == 'COMBINED_SEQ':
continue
summary_res = self._summary_row(results)
self._row_print([seq] + summary_res)
summary_res = self._summary_row(table_res['COMBINED_SEQ'])
self._row_print(['COMBINED'] + summary_res)
def _summary_row(self, results_):
vals = []
for h in self.summary_fields:
if h in self.float_array_fields:
vals.append("{0:1.5g}".format(100 * np.mean(results_[h])))
elif h in self.float_fields:
vals.append("{0:1.5g}".format(100 * float(results_[h])))
elif h in self.integer_fields:
vals.append("{0:d}".format(int(results_[h])))
else:
raise NotImplementedError("Summary function not implemented for this field type.")
return vals
@staticmethod
def _row_print(*argv):
"""Prints results in an evenly spaced rows, with more space in first row"""
if len(argv) == 1:
argv = argv[0]
to_print = '%-35s' % argv[0]
for v in argv[1:]:
to_print += '%-10s' % str(v)
print(to_print)
def summary_results(self, table_res):
"""Returns a simple summary of final results for a tracker"""
return dict(zip(self.summary_fields, self._summary_row(table_res['COMBINED_SEQ'])))
def detailed_results(self, table_res):
"""Returns detailed final results for a tracker"""
# Get detailed field information
detailed_fields = self.float_fields + self.integer_fields
for h in self.float_array_fields + self.integer_array_fields:
for alpha in [int(100*x) for x in self.array_labels]:
detailed_fields.append(h + '___' + str(alpha))
detailed_fields.append(h + '___AUC')
# Get detailed results
detailed_results = {}
for seq, res in table_res.items():
detailed_row = self._detailed_row(res)
if len(detailed_row) != len(detailed_fields):
raise TrackEvalException(
'Field names and data have different sizes (%i and %i)' % (len(detailed_row), len(detailed_fields)))
detailed_results[seq] = dict(zip(detailed_fields, detailed_row))
return detailed_results
def _detailed_row(self, res):
detailed_row = []
for h in self.float_fields + self.integer_fields:
detailed_row.append(res[h])
for h in self.float_array_fields + self.integer_array_fields:
for i, alpha in enumerate([int(100 * x) for x in self.array_labels]):
detailed_row.append(res[h][i])
detailed_row.append(np.mean(res[h]))
return detailed_row

View File

@@ -0,0 +1,186 @@
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_metric import _BaseMetric
from .. import _timing
from .. import utils
class CLEAR(_BaseMetric):
"""Class which implements the CLEAR metrics"""
@staticmethod
def get_default_config():
"""Default class config values"""
default_config = {
'THRESHOLD': 0.5, # Similarity score threshold required for a TP match. Default 0.5.
'PRINT_CONFIG': True, # Whether to print the config information on init. Default: False.
}
return default_config
def __init__(self, config=None):
super().__init__()
main_integer_fields = ['CLR_TP', 'CLR_FN', 'CLR_FP', 'IDSW', 'MT', 'PT', 'ML', 'Frag']
extra_integer_fields = ['CLR_Frames']
self.integer_fields = main_integer_fields + extra_integer_fields
main_float_fields = ['MOTA', 'MOTP', 'MODA', 'CLR_Re', 'CLR_Pr', 'MTR', 'PTR', 'MLR', 'sMOTA']
extra_float_fields = ['CLR_F1', 'FP_per_frame', 'MOTAL', 'MOTP_sum']
self.float_fields = main_float_fields + extra_float_fields
self.fields = self.float_fields + self.integer_fields
self.summed_fields = self.integer_fields + ['MOTP_sum']
self.summary_fields = main_float_fields + main_integer_fields
# Configuration options:
self.config = utils.init_config(config, self.get_default_config(), self.get_name())
self.threshold = float(self.config['THRESHOLD'])
@_timing.time
def eval_sequence(self, data):
"""Calculates CLEAR metrics for one sequence"""
# Initialise results
res = {}
for field in self.fields:
res[field] = 0
# Return result quickly if tracker or gt sequence is empty
if data['num_tracker_dets'] == 0:
res['CLR_FN'] = data['num_gt_dets']
res['ML'] = data['num_gt_ids']
res['MLR'] = 1.0
return res
if data['num_gt_dets'] == 0:
res['CLR_FP'] = data['num_tracker_dets']
res['MLR'] = 1.0
return res
# Variables counting global association
num_gt_ids = data['num_gt_ids']
gt_id_count = np.zeros(num_gt_ids) # For MT/ML/PT
gt_matched_count = np.zeros(num_gt_ids) # For MT/ML/PT
gt_frag_count = np.zeros(num_gt_ids) # For Frag
# Note that IDSWs are counted based on the last time each gt_id was present (any number of frames previously),
# but are only used in matching to continue current tracks based on the gt_id in the single previous timestep.
prev_tracker_id = np.nan * np.zeros(num_gt_ids) # For scoring IDSW
prev_timestep_tracker_id = np.nan * np.zeros(num_gt_ids) # For matching IDSW
# Calculate scores for each timestep
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
# Deal with the case that there are no gt_det/tracker_det in a timestep.
if len(gt_ids_t) == 0:
res['CLR_FP'] += len(tracker_ids_t)
continue
if len(tracker_ids_t) == 0:
res['CLR_FN'] += len(gt_ids_t)
gt_id_count[gt_ids_t] += 1
continue
# Calc score matrix to first minimise IDSWs from previous frame, and then maximise MOTP secondarily
similarity = data['similarity_scores'][t]
score_mat = (tracker_ids_t[np.newaxis, :] == prev_timestep_tracker_id[gt_ids_t[:, np.newaxis]])
score_mat = 1000 * score_mat + similarity
score_mat[similarity < self.threshold - np.finfo('float').eps] = 0
# Hungarian algorithm to find best matches
match_rows, match_cols = linear_sum_assignment(-score_mat)
actually_matched_mask = score_mat[match_rows, match_cols] > 0 + np.finfo('float').eps
match_rows = match_rows[actually_matched_mask]
match_cols = match_cols[actually_matched_mask]
matched_gt_ids = gt_ids_t[match_rows]
matched_tracker_ids = tracker_ids_t[match_cols]
# Calc IDSW for MOTA
prev_matched_tracker_ids = prev_tracker_id[matched_gt_ids]
is_idsw = (np.logical_not(np.isnan(prev_matched_tracker_ids))) & (
np.not_equal(matched_tracker_ids, prev_matched_tracker_ids))
res['IDSW'] += np.sum(is_idsw)
# Update counters for MT/ML/PT/Frag and record for IDSW/Frag for next timestep
gt_id_count[gt_ids_t] += 1
gt_matched_count[matched_gt_ids] += 1
not_previously_tracked = np.isnan(prev_timestep_tracker_id)
prev_tracker_id[matched_gt_ids] = matched_tracker_ids
prev_timestep_tracker_id[:] = np.nan
prev_timestep_tracker_id[matched_gt_ids] = matched_tracker_ids
currently_tracked = np.logical_not(np.isnan(prev_timestep_tracker_id))
gt_frag_count += np.logical_and(not_previously_tracked, currently_tracked)
# Calculate and accumulate basic statistics
num_matches = len(matched_gt_ids)
res['CLR_TP'] += num_matches
res['CLR_FN'] += len(gt_ids_t) - num_matches
res['CLR_FP'] += len(tracker_ids_t) - num_matches
if num_matches > 0:
res['MOTP_sum'] += sum(similarity[match_rows, match_cols])
# Calculate MT/ML/PT/Frag/MOTP
tracked_ratio = gt_matched_count[gt_id_count > 0] / gt_id_count[gt_id_count > 0]
res['MT'] = np.sum(np.greater(tracked_ratio, 0.8))
res['PT'] = np.sum(np.greater_equal(tracked_ratio, 0.2)) - res['MT']
res['ML'] = num_gt_ids - res['MT'] - res['PT']
res['Frag'] = np.sum(np.subtract(gt_frag_count[gt_frag_count > 0], 1))
res['MOTP'] = res['MOTP_sum'] / np.maximum(1.0, res['CLR_TP'])
res['CLR_Frames'] = data['num_timesteps']
# Calculate final CLEAR scores
res = self._compute_final_fields(res)
return res
def combine_sequences(self, all_res):
"""Combines metrics across all sequences"""
res = {}
for field in self.summed_fields:
res[field] = self._combine_sum(all_res, field)
res = self._compute_final_fields(res)
return res
def combine_classes_det_averaged(self, all_res):
"""Combines metrics across all classes by averaging over the detection values"""
res = {}
for field in self.summed_fields:
res[field] = self._combine_sum(all_res, field)
res = self._compute_final_fields(res)
return res
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
"""Combines metrics across all classes by averaging over the class values.
If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
"""
res = {}
for field in self.integer_fields:
if ignore_empty_classes:
res[field] = self._combine_sum(
{k: v for k, v in all_res.items() if v['CLR_TP'] + v['CLR_FN'] + v['CLR_FP'] > 0}, field)
else:
res[field] = self._combine_sum({k: v for k, v in all_res.items()}, field)
for field in self.float_fields:
if ignore_empty_classes:
res[field] = np.mean(
[v[field] for v in all_res.values() if v['CLR_TP'] + v['CLR_FN'] + v['CLR_FP'] > 0], axis=0)
else:
res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
return res
@staticmethod
def _compute_final_fields(res):
"""Calculate sub-metric ('field') values which only depend on other sub-metric values.
This function is used both for both per-sequence calculation, and in combining values across sequences.
"""
num_gt_ids = res['MT'] + res['ML'] + res['PT']
res['MTR'] = res['MT'] / np.maximum(1.0, num_gt_ids)
res['MLR'] = res['ML'] / np.maximum(1.0, num_gt_ids)
res['PTR'] = res['PT'] / np.maximum(1.0, num_gt_ids)
res['CLR_Re'] = res['CLR_TP'] / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
res['CLR_Pr'] = res['CLR_TP'] / np.maximum(1.0, res['CLR_TP'] + res['CLR_FP'])
res['MODA'] = (res['CLR_TP'] - res['CLR_FP']) / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
res['MOTA'] = (res['CLR_TP'] - res['CLR_FP'] - res['IDSW']) / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
res['MOTP'] = res['MOTP_sum'] / np.maximum(1.0, res['CLR_TP'])
res['sMOTA'] = (res['MOTP_sum'] - res['CLR_FP'] - res['IDSW']) / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
res['CLR_F1'] = res['CLR_TP'] / np.maximum(1.0, res['CLR_TP'] + 0.5*res['CLR_FN'] + 0.5*res['CLR_FP'])
res['FP_per_frame'] = res['CLR_FP'] / np.maximum(1.0, res['CLR_Frames'])
safe_log_idsw = np.log10(res['IDSW']) if res['IDSW'] > 0 else res['IDSW']
res['MOTAL'] = (res['CLR_TP'] - res['CLR_FP'] - safe_log_idsw) / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
return res

View File

@@ -0,0 +1,44 @@
from ._base_metric import _BaseMetric
from .. import _timing
class Count(_BaseMetric):
"""Class which simply counts the number of tracker and gt detections and ids."""
def __init__(self, config=None):
super().__init__()
self.integer_fields = ['Dets', 'GT_Dets', 'IDs', 'GT_IDs']
self.fields = self.integer_fields
self.summary_fields = self.fields
@_timing.time
def eval_sequence(self, data):
"""Returns counts for one sequence"""
# Get results
res = {'Dets': data['num_tracker_dets'],
'GT_Dets': data['num_gt_dets'],
'IDs': data['num_tracker_ids'],
'GT_IDs': data['num_gt_ids'],
'Frames': data['num_timesteps']}
return res
def combine_sequences(self, all_res):
"""Combines metrics across all sequences"""
res = {}
for field in self.integer_fields:
res[field] = self._combine_sum(all_res, field)
return res
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=None):
"""Combines metrics across all classes by averaging over the class values"""
res = {}
for field in self.integer_fields:
res[field] = self._combine_sum(all_res, field)
return res
def combine_classes_det_averaged(self, all_res):
"""Combines metrics across all classes by averaging over the detection values"""
res = {}
for field in self.integer_fields:
res[field] = self._combine_sum(all_res, field)
return res

View File

@@ -0,0 +1,203 @@
import os
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_metric import _BaseMetric
from .. import _timing
class HOTA(_BaseMetric):
"""Class which implements the HOTA metrics.
See: https://link.springer.com/article/10.1007/s11263-020-01375-2
"""
def __init__(self, config=None):
super().__init__()
self.plottable = True
self.array_labels = np.arange(0.05, 0.99, 0.05)
self.integer_array_fields = ['HOTA_TP', 'HOTA_FN', 'HOTA_FP']
self.float_array_fields = ['HOTA', 'DetA', 'AssA', 'DetRe', 'DetPr', 'AssRe', 'AssPr', 'LocA', 'OWTA']
self.float_fields = ['HOTA(0)', 'LocA(0)', 'HOTALocA(0)']
self.fields = self.float_array_fields + self.integer_array_fields + self.float_fields
self.summary_fields = self.float_array_fields + self.float_fields
@_timing.time
def eval_sequence(self, data):
"""Calculates the HOTA metrics for one sequence"""
# Initialise results
res = {}
for field in self.float_array_fields + self.integer_array_fields:
res[field] = np.zeros((len(self.array_labels)), dtype=np.float)
for field in self.float_fields:
res[field] = 0
# Return result quickly if tracker or gt sequence is empty
if data['num_tracker_dets'] == 0:
res['HOTA_FN'] = data['num_gt_dets'] * np.ones((len(self.array_labels)), dtype=np.float)
res['LocA'] = np.ones((len(self.array_labels)), dtype=np.float)
res['LocA(0)'] = 1.0
return res
if data['num_gt_dets'] == 0:
res['HOTA_FP'] = data['num_tracker_dets'] * np.ones((len(self.array_labels)), dtype=np.float)
res['LocA'] = np.ones((len(self.array_labels)), dtype=np.float)
res['LocA(0)'] = 1.0
return res
# Variables counting global association
potential_matches_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
gt_id_count = np.zeros((data['num_gt_ids'], 1))
tracker_id_count = np.zeros((1, data['num_tracker_ids']))
# First loop through each timestep and accumulate global track information.
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
# Count the potential matches between ids in each timestep
# These are normalised, weighted by the match similarity.
similarity = data['similarity_scores'][t]
sim_iou_denom = similarity.sum(0)[np.newaxis, :] + similarity.sum(1)[:, np.newaxis] - similarity
sim_iou = np.zeros_like(similarity)
sim_iou_mask = sim_iou_denom > 0 + np.finfo('float').eps
sim_iou[sim_iou_mask] = similarity[sim_iou_mask] / sim_iou_denom[sim_iou_mask]
potential_matches_count[gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]] += sim_iou
# Calculate the total number of dets for each gt_id and tracker_id.
gt_id_count[gt_ids_t] += 1
tracker_id_count[0, tracker_ids_t] += 1
# Calculate overall jaccard alignment score (before unique matching) between IDs
global_alignment_score = potential_matches_count / (gt_id_count + tracker_id_count - potential_matches_count)
matches_counts = [np.zeros_like(potential_matches_count) for _ in self.array_labels]
# Calculate scores for each timestep
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
# Deal with the case that there are no gt_det/tracker_det in a timestep.
if len(gt_ids_t) == 0:
for a, alpha in enumerate(self.array_labels):
res['HOTA_FP'][a] += len(tracker_ids_t)
continue
if len(tracker_ids_t) == 0:
for a, alpha in enumerate(self.array_labels):
res['HOTA_FN'][a] += len(gt_ids_t)
continue
# Get matching scores between pairs of dets for optimizing HOTA
similarity = data['similarity_scores'][t]
score_mat = global_alignment_score[gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]] * similarity
# Hungarian algorithm to find best matches
match_rows, match_cols = linear_sum_assignment(-score_mat)
# Calculate and accumulate basic statistics
for a, alpha in enumerate(self.array_labels):
actually_matched_mask = similarity[match_rows, match_cols] >= alpha - np.finfo('float').eps
alpha_match_rows = match_rows[actually_matched_mask]
alpha_match_cols = match_cols[actually_matched_mask]
num_matches = len(alpha_match_rows)
res['HOTA_TP'][a] += num_matches
res['HOTA_FN'][a] += len(gt_ids_t) - num_matches
res['HOTA_FP'][a] += len(tracker_ids_t) - num_matches
if num_matches > 0:
res['LocA'][a] += sum(similarity[alpha_match_rows, alpha_match_cols])
matches_counts[a][gt_ids_t[alpha_match_rows], tracker_ids_t[alpha_match_cols]] += 1
# Calculate association scores (AssA, AssRe, AssPr) for the alpha value.
# First calculate scores per gt_id/tracker_id combo and then average over the number of detections.
for a, alpha in enumerate(self.array_labels):
matches_count = matches_counts[a]
ass_a = matches_count / np.maximum(1, gt_id_count + tracker_id_count - matches_count)
res['AssA'][a] = np.sum(matches_count * ass_a) / np.maximum(1, res['HOTA_TP'][a])
ass_re = matches_count / np.maximum(1, gt_id_count)
res['AssRe'][a] = np.sum(matches_count * ass_re) / np.maximum(1, res['HOTA_TP'][a])
ass_pr = matches_count / np.maximum(1, tracker_id_count)
res['AssPr'][a] = np.sum(matches_count * ass_pr) / np.maximum(1, res['HOTA_TP'][a])
# Calculate final scores
res['LocA'] = np.maximum(1e-10, res['LocA']) / np.maximum(1e-10, res['HOTA_TP'])
res = self._compute_final_fields(res)
return res
def combine_sequences(self, all_res):
"""Combines metrics across all sequences"""
res = {}
for field in self.integer_array_fields:
res[field] = self._combine_sum(all_res, field)
for field in ['AssRe', 'AssPr', 'AssA']:
res[field] = self._combine_weighted_av(all_res, field, res, weight_field='HOTA_TP')
loca_weighted_sum = sum([all_res[k]['LocA'] * all_res[k]['HOTA_TP'] for k in all_res.keys()])
res['LocA'] = np.maximum(1e-10, loca_weighted_sum) / np.maximum(1e-10, res['HOTA_TP'])
res = self._compute_final_fields(res)
return res
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
"""Combines metrics across all classes by averaging over the class values.
If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
"""
res = {}
for field in self.integer_array_fields:
if ignore_empty_classes:
res[field] = self._combine_sum(
{k: v for k, v in all_res.items()
if (v['HOTA_TP'] + v['HOTA_FN'] + v['HOTA_FP'] > 0 + np.finfo('float').eps).any()}, field)
else:
res[field] = self._combine_sum({k: v for k, v in all_res.items()}, field)
for field in self.float_fields + self.float_array_fields:
if ignore_empty_classes:
res[field] = np.mean([v[field] for v in all_res.values() if
(v['HOTA_TP'] + v['HOTA_FN'] + v['HOTA_FP'] > 0 + np.finfo('float').eps).any()],
axis=0)
else:
res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
return res
def combine_classes_det_averaged(self, all_res):
"""Combines metrics across all classes by averaging over the detection values"""
res = {}
for field in self.integer_array_fields:
res[field] = self._combine_sum(all_res, field)
for field in ['AssRe', 'AssPr', 'AssA']:
res[field] = self._combine_weighted_av(all_res, field, res, weight_field='HOTA_TP')
loca_weighted_sum = sum([all_res[k]['LocA'] * all_res[k]['HOTA_TP'] for k in all_res.keys()])
res['LocA'] = np.maximum(1e-10, loca_weighted_sum) / np.maximum(1e-10, res['HOTA_TP'])
res = self._compute_final_fields(res)
return res
@staticmethod
def _compute_final_fields(res):
"""Calculate sub-metric ('field') values which only depend on other sub-metric values.
This function is used both for both per-sequence calculation, and in combining values across sequences.
"""
res['DetRe'] = res['HOTA_TP'] / np.maximum(1, res['HOTA_TP'] + res['HOTA_FN'])
res['DetPr'] = res['HOTA_TP'] / np.maximum(1, res['HOTA_TP'] + res['HOTA_FP'])
res['DetA'] = res['HOTA_TP'] / np.maximum(1, res['HOTA_TP'] + res['HOTA_FN'] + res['HOTA_FP'])
res['HOTA'] = np.sqrt(res['DetA'] * res['AssA'])
res['OWTA'] = np.sqrt(res['DetRe'] * res['AssA'])
res['HOTA(0)'] = res['HOTA'][0]
res['LocA(0)'] = res['LocA'][0]
res['HOTALocA(0)'] = res['HOTA(0)']*res['LocA(0)']
return res
def plot_single_tracker_results(self, table_res, tracker, cls, output_folder):
"""Create plot of results"""
# Only loaded when run to reduce minimum requirements
from matplotlib import pyplot as plt
res = table_res['COMBINED_SEQ']
styles_to_plot = ['r', 'b', 'g', 'b--', 'b:', 'g--', 'g:', 'm']
for name, style in zip(self.float_array_fields, styles_to_plot):
plt.plot(self.array_labels, res[name], style)
plt.xlabel('alpha')
plt.ylabel('score')
plt.title(tracker + ' - ' + cls)
plt.axis([0, 1, 0, 1])
legend = []
for name in self.float_array_fields:
legend += [name + ' (' + str(np.round(np.mean(res[name]), 2)) + ')']
plt.legend(legend, loc='lower left')
out_file = os.path.join(output_folder, cls + '_plot.pdf')
os.makedirs(os.path.dirname(out_file), exist_ok=True)
plt.savefig(out_file)
plt.savefig(out_file.replace('.pdf', '.png'))
plt.clf()

View File

@@ -0,0 +1,135 @@
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_metric import _BaseMetric
from .. import _timing
from .. import utils
class Identity(_BaseMetric):
"""Class which implements the ID metrics"""
@staticmethod
def get_default_config():
"""Default class config values"""
default_config = {
'THRESHOLD': 0.5, # Similarity score threshold required for a IDTP match. Default 0.5.
'PRINT_CONFIG': True, # Whether to print the config information on init. Default: False.
}
return default_config
def __init__(self, config=None):
super().__init__()
self.integer_fields = ['IDTP', 'IDFN', 'IDFP']
self.float_fields = ['IDF1', 'IDR', 'IDP']
self.fields = self.float_fields + self.integer_fields
self.summary_fields = self.fields
# Configuration options:
self.config = utils.init_config(config, self.get_default_config(), self.get_name())
self.threshold = float(self.config['THRESHOLD'])
@_timing.time
def eval_sequence(self, data):
"""Calculates ID metrics for one sequence"""
# Initialise results
res = {}
for field in self.fields:
res[field] = 0
# Return result quickly if tracker or gt sequence is empty
if data['num_tracker_dets'] == 0:
res['IDFN'] = data['num_gt_dets']
return res
if data['num_gt_dets'] == 0:
res['IDFP'] = data['num_tracker_dets']
return res
# Variables counting global association
potential_matches_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
gt_id_count = np.zeros(data['num_gt_ids'])
tracker_id_count = np.zeros(data['num_tracker_ids'])
# First loop through each timestep and accumulate global track information.
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
# Count the potential matches between ids in each timestep
matches_mask = np.greater_equal(data['similarity_scores'][t], self.threshold)
match_idx_gt, match_idx_tracker = np.nonzero(matches_mask)
potential_matches_count[gt_ids_t[match_idx_gt], tracker_ids_t[match_idx_tracker]] += 1
# Calculate the total number of dets for each gt_id and tracker_id.
gt_id_count[gt_ids_t] += 1
tracker_id_count[tracker_ids_t] += 1
# Calculate optimal assignment cost matrix for ID metrics
num_gt_ids = data['num_gt_ids']
num_tracker_ids = data['num_tracker_ids']
fp_mat = np.zeros((num_gt_ids + num_tracker_ids, num_gt_ids + num_tracker_ids))
fn_mat = np.zeros((num_gt_ids + num_tracker_ids, num_gt_ids + num_tracker_ids))
fp_mat[num_gt_ids:, :num_tracker_ids] = 1e10
fn_mat[:num_gt_ids, num_tracker_ids:] = 1e10
for gt_id in range(num_gt_ids):
fn_mat[gt_id, :num_tracker_ids] = gt_id_count[gt_id]
fn_mat[gt_id, num_tracker_ids + gt_id] = gt_id_count[gt_id]
for tracker_id in range(num_tracker_ids):
fp_mat[:num_gt_ids, tracker_id] = tracker_id_count[tracker_id]
fp_mat[tracker_id + num_gt_ids, tracker_id] = tracker_id_count[tracker_id]
fn_mat[:num_gt_ids, :num_tracker_ids] -= potential_matches_count
fp_mat[:num_gt_ids, :num_tracker_ids] -= potential_matches_count
# Hungarian algorithm
match_rows, match_cols = linear_sum_assignment(fn_mat + fp_mat)
# Accumulate basic statistics
res['IDFN'] = fn_mat[match_rows, match_cols].sum().astype(np.int)
res['IDFP'] = fp_mat[match_rows, match_cols].sum().astype(np.int)
res['IDTP'] = (gt_id_count.sum() - res['IDFN']).astype(np.int)
# Calculate final ID scores
res = self._compute_final_fields(res)
return res
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
"""Combines metrics across all classes by averaging over the class values.
If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
"""
res = {}
for field in self.integer_fields:
if ignore_empty_classes:
res[field] = self._combine_sum({k: v for k, v in all_res.items()
if v['IDTP'] + v['IDFN'] + v['IDFP'] > 0 + np.finfo('float').eps},
field)
else:
res[field] = self._combine_sum({k: v for k, v in all_res.items()}, field)
for field in self.float_fields:
if ignore_empty_classes:
res[field] = np.mean([v[field] for v in all_res.values()
if v['IDTP'] + v['IDFN'] + v['IDFP'] > 0 + np.finfo('float').eps], axis=0)
else:
res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
return res
def combine_classes_det_averaged(self, all_res):
"""Combines metrics across all classes by averaging over the detection values"""
res = {}
for field in self.integer_fields:
res[field] = self._combine_sum(all_res, field)
res = self._compute_final_fields(res)
return res
def combine_sequences(self, all_res):
"""Combines metrics across all sequences"""
res = {}
for field in self.integer_fields:
res[field] = self._combine_sum(all_res, field)
res = self._compute_final_fields(res)
return res
@staticmethod
def _compute_final_fields(res):
"""Calculate sub-metric ('field') values which only depend on other sub-metric values.
This function is used both for both per-sequence calculation, and in combining values across sequences.
"""
res['IDR'] = res['IDTP'] / np.maximum(1.0, res['IDTP'] + res['IDFN'])
res['IDP'] = res['IDTP'] / np.maximum(1.0, res['IDTP'] + res['IDFP'])
res['IDF1'] = res['IDTP'] / np.maximum(1.0, res['IDTP'] + 0.5 * res['IDFP'] + 0.5 * res['IDFN'])
return res

View File

@@ -0,0 +1,135 @@
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_metric import _BaseMetric
from .. import _timing
from collections import defaultdict
from .. import utils
class IDEucl(_BaseMetric):
"""Class which implements the ID metrics"""
@staticmethod
def get_default_config():
"""Default class config values"""
default_config = {
'THRESHOLD': 0.4, # Similarity score threshold required for a IDTP match. 0.4 for IDEucl.
'PRINT_CONFIG': True, # Whether to print the config information on init. Default: False.
}
return default_config
def __init__(self, config=None):
super().__init__()
self.fields = ['IDEucl']
self.float_fields = self.fields
self.summary_fields = self.fields
# Configuration options:
self.config = utils.init_config(config, self.get_default_config(), self.get_name())
self.threshold = float(self.config['THRESHOLD'])
@_timing.time
def eval_sequence(self, data):
"""Calculates IDEucl metrics for all frames"""
# Initialise results
res = {'IDEucl' : 0}
# Return result quickly if tracker or gt sequence is empty
if data['num_tracker_dets'] == 0 or data['num_gt_dets'] == 0.:
return res
data['centroid'] = []
for t, gt_det in enumerate(data['gt_dets']):
# import pdb;pdb.set_trace()
data['centroid'].append(self._compute_centroid(gt_det))
oid_hid_cent = defaultdict(list)
oid_cent = defaultdict(list)
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
matches_mask = np.greater_equal(data['similarity_scores'][t], self.threshold)
# I hope the orders of ids and boxes are maintained in `data`
for ind, gid in enumerate(gt_ids_t):
oid_cent[gid].append(data['centroid'][t][ind])
match_idx_gt, match_idx_tracker = np.nonzero(matches_mask)
for m_gid, m_tid in zip(match_idx_gt, match_idx_tracker):
oid_hid_cent[gt_ids_t[m_gid], tracker_ids_t[m_tid]].append(data['centroid'][t][m_gid])
oid_hid_dist = {k : np.sum(np.linalg.norm(np.diff(np.array(v), axis=0), axis=1)) for k, v in oid_hid_cent.items()}
oid_dist = {int(k) : np.sum(np.linalg.norm(np.diff(np.array(v), axis=0), axis=1)) for k, v in oid_cent.items()}
unique_oid = np.unique([i[0] for i in oid_hid_dist.keys()]).tolist()
unique_hid = np.unique([i[1] for i in oid_hid_dist.keys()]).tolist()
o_len = len(unique_oid)
h_len = len(unique_hid)
dist_matrix = np.zeros((o_len, h_len))
for ((oid, hid), dist) in oid_hid_dist.items():
oid_ind = unique_oid.index(oid)
hid_ind = unique_hid.index(hid)
dist_matrix[oid_ind, hid_ind] = dist
# opt_hyp_dist contains GT ID : max dist covered by track
opt_hyp_dist = dict.fromkeys(oid_dist.keys(), 0.)
cost_matrix = np.max(dist_matrix) - dist_matrix
rows, cols = linear_sum_assignment(cost_matrix)
for (row, col) in zip(rows, cols):
value = dist_matrix[row, col]
opt_hyp_dist[int(unique_oid[row])] = value
assert len(opt_hyp_dist.keys()) == len(oid_dist.keys())
hyp_length = np.sum(list(opt_hyp_dist.values()))
gt_length = np.sum(list(oid_dist.values()))
id_eucl =np.mean([np.divide(a, b, out=np.zeros_like(a), where=b!=0) for a, b in zip(opt_hyp_dist.values(), oid_dist.values())])
res['IDEucl'] = np.divide(hyp_length, gt_length, out=np.zeros_like(hyp_length), where=gt_length!=0)
return res
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
"""Combines metrics across all classes by averaging over the class values.
If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
"""
res = {}
for field in self.float_fields:
if ignore_empty_classes:
res[field] = np.mean([v[field] for v in all_res.values()
if v['IDEucl'] > 0 + np.finfo('float').eps], axis=0)
else:
res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
return res
def combine_classes_det_averaged(self, all_res):
"""Combines metrics across all classes by averaging over the detection values"""
res = {}
for field in self.float_fields:
res[field] = self._combine_sum(all_res, field)
res = self._compute_final_fields(res, len(all_res))
return res
def combine_sequences(self, all_res):
"""Combines metrics across all sequences"""
res = {}
for field in self.float_fields:
res[field] = self._combine_sum(all_res, field)
res = self._compute_final_fields(res, len(all_res))
return res
@staticmethod
def _compute_centroid(box):
box = np.array(box)
if len(box.shape) == 1:
centroid = (box[0:2] + box[2:4])/2
else:
centroid = (box[:, 0:2] + box[:, 2:4])/2
return np.flip(centroid, axis=1)
@staticmethod
def _compute_final_fields(res, res_len):
"""
Exists only to match signature with the original Identiy class.
"""
return {k:v/res_len for k,v in res.items()}

View File

@@ -0,0 +1,310 @@
import numpy as np
import math
from scipy.optimize import linear_sum_assignment
from ..utils import TrackEvalException
from ._base_metric import _BaseMetric
from .. import _timing
class JAndF(_BaseMetric):
"""Class which implements the J&F metrics"""
def __init__(self, config=None):
super().__init__()
self.integer_fields = ['num_gt_tracks']
self.float_fields = ['J-Mean', 'J-Recall', 'J-Decay', 'F-Mean', 'F-Recall', 'F-Decay', 'J&F']
self.fields = self.float_fields + self.integer_fields
self.summary_fields = self.float_fields
self.optim_type = 'J' # possible values J, J&F
@_timing.time
def eval_sequence(self, data):
"""Returns J&F metrics for one sequence"""
# Only loaded when run to reduce minimum requirements
from pycocotools import mask as mask_utils
num_timesteps = data['num_timesteps']
num_tracker_ids = data['num_tracker_ids']
num_gt_ids = data['num_gt_ids']
gt_dets = data['gt_dets']
tracker_dets = data['tracker_dets']
gt_ids = data['gt_ids']
tracker_ids = data['tracker_ids']
# get shape of frames
frame_shape = None
if num_gt_ids > 0:
for t in range(num_timesteps):
if len(gt_ids[t]) > 0:
frame_shape = gt_dets[t][0]['size']
break
elif num_tracker_ids > 0:
for t in range(num_timesteps):
if len(tracker_ids[t]) > 0:
frame_shape = tracker_dets[t][0]['size']
break
if frame_shape:
# append all zero masks for timesteps in which tracks do not have a detection
zero_padding = np.zeros((frame_shape), order= 'F').astype(np.uint8)
padding_mask = mask_utils.encode(zero_padding)
for t in range(num_timesteps):
gt_id_det_mapping = {gt_ids[t][i]: gt_dets[t][i] for i in range(len(gt_ids[t]))}
gt_dets[t] = [gt_id_det_mapping[index] if index in gt_ids[t] else padding_mask for index
in range(num_gt_ids)]
tracker_id_det_mapping = {tracker_ids[t][i]: tracker_dets[t][i] for i in range(len(tracker_ids[t]))}
tracker_dets[t] = [tracker_id_det_mapping[index] if index in tracker_ids[t] else padding_mask for index
in range(num_tracker_ids)]
# also perform zero padding if number of tracker IDs < number of ground truth IDs
if num_tracker_ids < num_gt_ids:
diff = num_gt_ids - num_tracker_ids
for t in range(num_timesteps):
tracker_dets[t] = tracker_dets[t] + [padding_mask for _ in range(diff)]
num_tracker_ids += diff
j = self._compute_j(gt_dets, tracker_dets, num_gt_ids, num_tracker_ids, num_timesteps)
# boundary threshold for F computation
bound_th = 0.008
# perform matching
if self.optim_type == 'J&F':
f = np.zeros_like(j)
for k in range(num_tracker_ids):
for i in range(num_gt_ids):
f[k, i, :] = self._compute_f(gt_dets, tracker_dets, k, i, bound_th)
optim_metrics = (np.mean(j, axis=2) + np.mean(f, axis=2)) / 2
row_ind, col_ind = linear_sum_assignment(- optim_metrics)
j_m = j[row_ind, col_ind, :]
f_m = f[row_ind, col_ind, :]
elif self.optim_type == 'J':
optim_metrics = np.mean(j, axis=2)
row_ind, col_ind = linear_sum_assignment(- optim_metrics)
j_m = j[row_ind, col_ind, :]
f_m = np.zeros_like(j_m)
for i, (tr_ind, gt_ind) in enumerate(zip(row_ind, col_ind)):
f_m[i] = self._compute_f(gt_dets, tracker_dets, tr_ind, gt_ind, bound_th)
else:
raise TrackEvalException('Unsupported optimization type %s for J&F metric.' % self.optim_type)
# append zeros for false negatives
if j_m.shape[0] < data['num_gt_ids']:
diff = data['num_gt_ids'] - j_m.shape[0]
j_m = np.concatenate((j_m, np.zeros((diff, j_m.shape[1]))), axis=0)
f_m = np.concatenate((f_m, np.zeros((diff, f_m.shape[1]))), axis=0)
# compute the metrics for each ground truth track
res = {
'J-Mean': [np.nanmean(j_m[i, :]) for i in range(j_m.shape[0])],
'J-Recall': [np.nanmean(j_m[i, :] > 0.5 + np.finfo('float').eps) for i in range(j_m.shape[0])],
'F-Mean': [np.nanmean(f_m[i, :]) for i in range(f_m.shape[0])],
'F-Recall': [np.nanmean(f_m[i, :] > 0.5 + np.finfo('float').eps) for i in range(f_m.shape[0])],
'J-Decay': [],
'F-Decay': []
}
n_bins = 4
ids = np.round(np.linspace(1, data['num_timesteps'], n_bins + 1) + 1e-10) - 1
ids = ids.astype(np.uint8)
for k in range(j_m.shape[0]):
d_bins_j = [j_m[k][ids[i]:ids[i + 1] + 1] for i in range(0, n_bins)]
res['J-Decay'].append(np.nanmean(d_bins_j[0]) - np.nanmean(d_bins_j[3]))
for k in range(f_m.shape[0]):
d_bins_f = [f_m[k][ids[i]:ids[i + 1] + 1] for i in range(0, n_bins)]
res['F-Decay'].append(np.nanmean(d_bins_f[0]) - np.nanmean(d_bins_f[3]))
# count number of tracks for weighting of the result
res['num_gt_tracks'] = len(res['J-Mean'])
for field in ['J-Mean', 'J-Recall', 'J-Decay', 'F-Mean', 'F-Recall', 'F-Decay']:
res[field] = np.mean(res[field])
res['J&F'] = (res['J-Mean'] + res['F-Mean']) / 2
return res
def combine_sequences(self, all_res):
"""Combines metrics across all sequences"""
res = {'num_gt_tracks': self._combine_sum(all_res, 'num_gt_tracks')}
for field in self.summary_fields:
res[field] = self._combine_weighted_av(all_res, field, res, weight_field='num_gt_tracks')
return res
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
"""Combines metrics across all classes by averaging over the class values
'ignore empty classes' is not yet implemented here.
"""
res = {'num_gt_tracks': self._combine_sum(all_res, 'num_gt_tracks')}
for field in self.float_fields:
res[field] = np.mean([v[field] for v in all_res.values()])
return res
def combine_classes_det_averaged(self, all_res):
"""Combines metrics across all classes by averaging over the detection values"""
res = {'num_gt_tracks': self._combine_sum(all_res, 'num_gt_tracks')}
for field in self.float_fields:
res[field] = np.mean([v[field] for v in all_res.values()])
return res
@staticmethod
def _seg2bmap(seg, width=None, height=None):
"""
From a segmentation, compute a binary boundary map with 1 pixel wide
boundaries. The boundary pixels are offset by 1/2 pixel towards the
origin from the actual segment boundary.
Arguments:
seg : Segments labeled from 1..k.
width : Width of desired bmap <= seg.shape[1]
height : Height of desired bmap <= seg.shape[0]
Returns:
bmap (ndarray): Binary boundary map.
David Martin <dmartin@eecs.berkeley.edu>
January 2003
"""
seg = seg.astype(np.bool)
seg[seg > 0] = 1
assert np.atleast_3d(seg).shape[2] == 1
width = seg.shape[1] if width is None else width
height = seg.shape[0] if height is None else height
h, w = seg.shape[:2]
ar1 = float(width) / float(height)
ar2 = float(w) / float(h)
assert not (
width > w | height > h | abs(ar1 - ar2) > 0.01
), "Can" "t convert %dx%d seg to %dx%d bmap." % (w, h, width, height)
e = np.zeros_like(seg)
s = np.zeros_like(seg)
se = np.zeros_like(seg)
e[:, :-1] = seg[:, 1:]
s[:-1, :] = seg[1:, :]
se[:-1, :-1] = seg[1:, 1:]
b = seg ^ e | seg ^ s | seg ^ se
b[-1, :] = seg[-1, :] ^ e[-1, :]
b[:, -1] = seg[:, -1] ^ s[:, -1]
b[-1, -1] = 0
if w == width and h == height:
bmap = b
else:
bmap = np.zeros((height, width))
for x in range(w):
for y in range(h):
if b[y, x]:
j = 1 + math.floor((y - 1) + height / h)
i = 1 + math.floor((x - 1) + width / h)
bmap[j, i] = 1
return bmap
@staticmethod
def _compute_f(gt_data, tracker_data, tracker_data_id, gt_id, bound_th):
"""
Perform F computation for a given gt and a given tracker ID. Adapted from
https://github.com/davisvideochallenge/davis2017-evaluation
:param gt_data: the encoded gt masks
:param tracker_data: the encoded tracker masks
:param tracker_data_id: the tracker ID
:param gt_id: the ground truth ID
:param bound_th: boundary threshold parameter
:return: the F value for the given tracker and gt ID
"""
# Only loaded when run to reduce minimum requirements
from pycocotools import mask as mask_utils
from skimage.morphology import disk
import cv2
f = np.zeros(len(gt_data))
for t, (gt_masks, tracker_masks) in enumerate(zip(gt_data, tracker_data)):
curr_tracker_mask = mask_utils.decode(tracker_masks[tracker_data_id])
curr_gt_mask = mask_utils.decode(gt_masks[gt_id])
bound_pix = bound_th if bound_th >= 1 - np.finfo('float').eps else \
np.ceil(bound_th * np.linalg.norm(curr_tracker_mask.shape))
# Get the pixel boundaries of both masks
fg_boundary = JAndF._seg2bmap(curr_tracker_mask)
gt_boundary = JAndF._seg2bmap(curr_gt_mask)
# fg_dil = binary_dilation(fg_boundary, disk(bound_pix))
fg_dil = cv2.dilate(fg_boundary.astype(np.uint8), disk(bound_pix).astype(np.uint8))
# gt_dil = binary_dilation(gt_boundary, disk(bound_pix))
gt_dil = cv2.dilate(gt_boundary.astype(np.uint8), disk(bound_pix).astype(np.uint8))
# Get the intersection
gt_match = gt_boundary * fg_dil
fg_match = fg_boundary * gt_dil
# Area of the intersection
n_fg = np.sum(fg_boundary)
n_gt = np.sum(gt_boundary)
# % Compute precision and recall
if n_fg == 0 and n_gt > 0:
precision = 1
recall = 0
elif n_fg > 0 and n_gt == 0:
precision = 0
recall = 1
elif n_fg == 0 and n_gt == 0:
precision = 1
recall = 1
else:
precision = np.sum(fg_match) / float(n_fg)
recall = np.sum(gt_match) / float(n_gt)
# Compute F measure
if precision + recall == 0:
f_val = 0
else:
f_val = 2 * precision * recall / (precision + recall)
f[t] = f_val
return f
@staticmethod
def _compute_j(gt_data, tracker_data, num_gt_ids, num_tracker_ids, num_timesteps):
"""
Computation of J value for all ground truth IDs and all tracker IDs in the given sequence. Adapted from
https://github.com/davisvideochallenge/davis2017-evaluation
:param gt_data: the ground truth masks
:param tracker_data: the tracker masks
:param num_gt_ids: the number of ground truth IDs
:param num_tracker_ids: the number of tracker IDs
:param num_timesteps: the number of timesteps
:return: the J values
"""
# Only loaded when run to reduce minimum requirements
from pycocotools import mask as mask_utils
j = np.zeros((num_tracker_ids, num_gt_ids, num_timesteps))
for t, (time_gt, time_data) in enumerate(zip(gt_data, tracker_data)):
# run length encoded masks with pycocotools
area_gt = mask_utils.area(time_gt)
time_data = list(time_data)
area_tr = mask_utils.area(time_data)
area_tr = np.repeat(area_tr[:, np.newaxis], len(area_gt), axis=1)
area_gt = np.repeat(area_gt[np.newaxis, :], len(area_tr), axis=0)
# mask iou computation with pycocotools
ious = np.atleast_2d(mask_utils.iou(time_data, time_gt, [0]*len(time_gt)))
# set iou to 1 if both masks are close to 0 (no ground truth and no predicted mask in timestep)
ious[np.isclose(area_tr, 0) & np.isclose(area_gt, 0)] = 1
assert (ious >= 0 - np.finfo('float').eps).all()
assert (ious <= 1 + np.finfo('float').eps).all()
j[..., t] = ious
return j

View File

@@ -0,0 +1,462 @@
import numpy as np
from ._base_metric import _BaseMetric
from .. import _timing
from functools import partial
from .. import utils
from ..utils import TrackEvalException
class TrackMAP(_BaseMetric):
"""Class which implements the TrackMAP metrics"""
@staticmethod
def get_default_metric_config():
"""Default class config values"""
default_config = {
'USE_AREA_RANGES': True, # whether to evaluate for certain area ranges
'AREA_RANGES': [[0 ** 2, 32 ** 2], # additional area range sets for which TrackMAP is evaluated
[32 ** 2, 96 ** 2], # (all area range always included), default values for TAO
[96 ** 2, 1e5 ** 2]], # evaluation
'AREA_RANGE_LABELS': ["area_s", "area_m", "area_l"], # the labels for the area ranges
'USE_TIME_RANGES': True, # whether to evaluate for certain time ranges (length of tracks)
'TIME_RANGES': [[0, 3], [3, 10], [10, 1e5]], # additional time range sets for which TrackMAP is evaluated
# (all time range always included) , default values for TAO evaluation
'TIME_RANGE_LABELS': ["time_s", "time_m", "time_l"], # the labels for the time ranges
'IOU_THRESHOLDS': np.arange(0.5, 0.96, 0.05), # the IoU thresholds
'RECALL_THRESHOLDS': np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01) + 1), endpoint=True),
# recall thresholds at which precision is evaluated
'MAX_DETECTIONS': 0, # limit the maximum number of considered tracks per sequence (0 for unlimited)
'PRINT_CONFIG': True
}
return default_config
def __init__(self, config=None):
super().__init__()
self.config = utils.init_config(config, self.get_default_metric_config(), self.get_name())
self.num_ig_masks = 1
self.lbls = ['all']
self.use_area_rngs = self.config['USE_AREA_RANGES']
if self.use_area_rngs:
self.area_rngs = self.config['AREA_RANGES']
self.area_rng_lbls = self.config['AREA_RANGE_LABELS']
self.num_ig_masks += len(self.area_rng_lbls)
self.lbls += self.area_rng_lbls
self.use_time_rngs = self.config['USE_TIME_RANGES']
if self.use_time_rngs:
self.time_rngs = self.config['TIME_RANGES']
self.time_rng_lbls = self.config['TIME_RANGE_LABELS']
self.num_ig_masks += len(self.time_rng_lbls)
self.lbls += self.time_rng_lbls
self.array_labels = self.config['IOU_THRESHOLDS']
self.rec_thrs = self.config['RECALL_THRESHOLDS']
self.maxDet = self.config['MAX_DETECTIONS']
self.float_array_fields = ['AP_' + lbl for lbl in self.lbls] + ['AR_' + lbl for lbl in self.lbls]
self.fields = self.float_array_fields
self.summary_fields = self.float_array_fields
@_timing.time
def eval_sequence(self, data):
"""Calculates GT and Tracker matches for one sequence for TrackMAP metrics. Adapted from
https://github.com/TAO-Dataset/"""
# Initialise results to zero for each sequence as the fields are only defined over the set of all sequences
res = {}
for field in self.fields:
res[field] = [0 for _ in self.array_labels]
gt_ids, dt_ids = data['gt_track_ids'], data['dt_track_ids']
if len(gt_ids) == 0 and len(dt_ids) == 0:
for idx in range(self.num_ig_masks):
res[idx] = None
return res
# get track data
gt_tr_areas = data.get('gt_track_areas', None) if self.use_area_rngs else None
gt_tr_lengths = data.get('gt_track_lengths', None) if self.use_time_rngs else None
gt_tr_iscrowd = data.get('gt_track_iscrowd', None)
dt_tr_areas = data.get('dt_track_areas', None) if self.use_area_rngs else None
dt_tr_lengths = data.get('dt_track_lengths', None) if self.use_time_rngs else None
is_nel = data.get('not_exhaustively_labeled', False)
# compute ignore masks for different track sets to eval
gt_ig_masks = self._compute_track_ig_masks(len(gt_ids), track_lengths=gt_tr_lengths, track_areas=gt_tr_areas,
iscrowd=gt_tr_iscrowd)
dt_ig_masks = self._compute_track_ig_masks(len(dt_ids), track_lengths=dt_tr_lengths, track_areas=dt_tr_areas,
is_not_exhaustively_labeled=is_nel, is_gt=False)
boxformat = data.get('boxformat', 'xywh')
ious = self._compute_track_ious(data['dt_tracks'], data['gt_tracks'], iou_function=data['iou_type'],
boxformat=boxformat)
for mask_idx in range(self.num_ig_masks):
gt_ig_mask = gt_ig_masks[mask_idx]
# Sort gt ignore last
gt_idx = np.argsort([g for g in gt_ig_mask], kind="mergesort")
gt_ids = [gt_ids[i] for i in gt_idx]
ious_sorted = ious[:, gt_idx] if len(ious) > 0 else ious
num_thrs = len(self.array_labels)
num_gt = len(gt_ids)
num_dt = len(dt_ids)
# Array to store the "id" of the matched dt/gt
gt_m = np.zeros((num_thrs, num_gt)) - 1
dt_m = np.zeros((num_thrs, num_dt)) - 1
gt_ig = np.array([gt_ig_mask[idx] for idx in gt_idx])
dt_ig = np.zeros((num_thrs, num_dt))
for iou_thr_idx, iou_thr in enumerate(self.array_labels):
if len(ious_sorted) == 0:
break
for dt_idx, _dt in enumerate(dt_ids):
iou = min([iou_thr, 1 - 1e-10])
# information about best match so far (m=-1 -> unmatched)
# store the gt_idx which matched for _dt
m = -1
for gt_idx, _ in enumerate(gt_ids):
# if this gt already matched continue
if gt_m[iou_thr_idx, gt_idx] > 0:
continue
# if _dt matched to reg gt, and on ignore gt, stop
if m > -1 and gt_ig[m] == 0 and gt_ig[gt_idx] == 1:
break
# continue to next gt unless better match made
if ious_sorted[dt_idx, gt_idx] < iou - np.finfo('float').eps:
continue
# if match successful and best so far, store appropriately
iou = ious_sorted[dt_idx, gt_idx]
m = gt_idx
# No match found for _dt, go to next _dt
if m == -1:
continue
# if gt to ignore for some reason update dt_ig.
# Should not be used in evaluation.
dt_ig[iou_thr_idx, dt_idx] = gt_ig[m]
# _dt match found, update gt_m, and dt_m with "id"
dt_m[iou_thr_idx, dt_idx] = gt_ids[m]
gt_m[iou_thr_idx, m] = _dt
dt_ig_mask = dt_ig_masks[mask_idx]
dt_ig_mask = np.array(dt_ig_mask).reshape((1, num_dt)) # 1 X num_dt
dt_ig_mask = np.repeat(dt_ig_mask, num_thrs, 0) # num_thrs X num_dt
# Based on dt_ig_mask ignore any unmatched detection by updating dt_ig
dt_ig = np.logical_or(dt_ig, np.logical_and(dt_m == -1, dt_ig_mask))
# store results for given video and category
res[mask_idx] = {
"dt_ids": dt_ids,
"gt_ids": gt_ids,
"dt_matches": dt_m,
"gt_matches": gt_m,
"dt_scores": data['dt_track_scores'],
"gt_ignore": gt_ig,
"dt_ignore": dt_ig,
}
return res
def combine_sequences(self, all_res):
"""Combines metrics across all sequences. Computes precision and recall values based on track matches.
Adapted from https://github.com/TAO-Dataset/
"""
num_thrs = len(self.array_labels)
num_recalls = len(self.rec_thrs)
# -1 for absent categories
precision = -np.ones(
(num_thrs, num_recalls, self.num_ig_masks)
)
recall = -np.ones((num_thrs, self.num_ig_masks))
for ig_idx in range(self.num_ig_masks):
ig_idx_results = [res[ig_idx] for res in all_res.values() if res[ig_idx] is not None]
# Remove elements which are None
if len(ig_idx_results) == 0:
continue
# Append all scores: shape (N,)
# limit considered tracks for each sequence if maxDet > 0
if self.maxDet == 0:
dt_scores = np.concatenate([res["dt_scores"] for res in ig_idx_results], axis=0)
dt_idx = np.argsort(-dt_scores, kind="mergesort")
dt_m = np.concatenate([e["dt_matches"] for e in ig_idx_results],
axis=1)[:, dt_idx]
dt_ig = np.concatenate([e["dt_ignore"] for e in ig_idx_results],
axis=1)[:, dt_idx]
elif self.maxDet > 0:
dt_scores = np.concatenate([res["dt_scores"][0:self.maxDet] for res in ig_idx_results], axis=0)
dt_idx = np.argsort(-dt_scores, kind="mergesort")
dt_m = np.concatenate([e["dt_matches"][:, 0:self.maxDet] for e in ig_idx_results],
axis=1)[:, dt_idx]
dt_ig = np.concatenate([e["dt_ignore"][:, 0:self.maxDet] for e in ig_idx_results],
axis=1)[:, dt_idx]
else:
raise Exception("Number of maximum detections must be >= 0, but is set to %i" % self.maxDet)
gt_ig = np.concatenate([res["gt_ignore"] for res in ig_idx_results])
# num gt anns to consider
num_gt = np.count_nonzero(gt_ig == 0)
if num_gt == 0:
continue
tps = np.logical_and(dt_m != -1, np.logical_not(dt_ig))
fps = np.logical_and(dt_m == -1, np.logical_not(dt_ig))
tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
for iou_thr_idx, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
tp = np.array(tp)
fp = np.array(fp)
num_tp = len(tp)
rc = tp / num_gt
if num_tp:
recall[iou_thr_idx, ig_idx] = rc[-1]
else:
recall[iou_thr_idx, ig_idx] = 0
# np.spacing(1) ~= eps
pr = tp / (fp + tp + np.spacing(1))
pr = pr.tolist()
# Ensure precision values are monotonically decreasing
for i in range(num_tp - 1, 0, -1):
if pr[i] > pr[i - 1]:
pr[i - 1] = pr[i]
# find indices at the predefined recall values
rec_thrs_insert_idx = np.searchsorted(rc, self.rec_thrs, side="left")
pr_at_recall = [0.0] * num_recalls
try:
for _idx, pr_idx in enumerate(rec_thrs_insert_idx):
pr_at_recall[_idx] = pr[pr_idx]
except IndexError:
pass
precision[iou_thr_idx, :, ig_idx] = (np.array(pr_at_recall))
res = {'precision': precision, 'recall': recall}
# compute the precision and recall averages for the respective alpha thresholds and ignore masks
for lbl in self.lbls:
res['AP_' + lbl] = np.zeros((len(self.array_labels)), dtype=np.float)
res['AR_' + lbl] = np.zeros((len(self.array_labels)), dtype=np.float)
for a_id, alpha in enumerate(self.array_labels):
for lbl_idx, lbl in enumerate(self.lbls):
p = precision[a_id, :, lbl_idx]
if len(p[p > -1]) == 0:
mean_p = -1
else:
mean_p = np.mean(p[p > -1])
res['AP_' + lbl][a_id] = mean_p
res['AR_' + lbl][a_id] = recall[a_id, lbl_idx]
return res
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=True):
"""Combines metrics across all classes by averaging over the class values
Note mAP is not well defined for 'empty classes' so 'ignore empty classes' is always true here.
"""
res = {}
for field in self.fields:
res[field] = np.zeros((len(self.array_labels)), dtype=np.float)
field_stacked = np.array([res[field] for res in all_res.values()])
for a_id, alpha in enumerate(self.array_labels):
values = field_stacked[:, a_id]
if len(values[values > -1]) == 0:
mean = -1
else:
mean = np.mean(values[values > -1])
res[field][a_id] = mean
return res
def combine_classes_det_averaged(self, all_res):
"""Combines metrics across all classes by averaging over the detection values"""
res = {}
for field in self.fields:
res[field] = np.zeros((len(self.array_labels)), dtype=np.float)
field_stacked = np.array([res[field] for res in all_res.values()])
for a_id, alpha in enumerate(self.array_labels):
values = field_stacked[:, a_id]
if len(values[values > -1]) == 0:
mean = -1
else:
mean = np.mean(values[values > -1])
res[field][a_id] = mean
return res
def _compute_track_ig_masks(self, num_ids, track_lengths=None, track_areas=None, iscrowd=None,
is_not_exhaustively_labeled=False, is_gt=True):
"""
Computes ignore masks for different track sets to evaluate
:param num_ids: the number of track IDs
:param track_lengths: the lengths of the tracks (number of timesteps)
:param track_areas: the average area of a track
:param iscrowd: whether a track is marked as crowd
:param is_not_exhaustively_labeled: whether the track category is not exhaustively labeled
:param is_gt: whether it is gt
:return: the track ignore masks
"""
# for TAO tracks for classes which are not exhaustively labeled are not evaluated
if not is_gt and is_not_exhaustively_labeled:
track_ig_masks = [[1 for _ in range(num_ids)] for i in range(self.num_ig_masks)]
else:
# consider all tracks
track_ig_masks = [[0 for _ in range(num_ids)]]
# consider tracks with certain area
if self.use_area_rngs:
for rng in self.area_rngs:
track_ig_masks.append([0 if rng[0] - np.finfo('float').eps <= area <= rng[1] + np.finfo('float').eps
else 1 for area in track_areas])
# consider tracks with certain duration
if self.use_time_rngs:
for rng in self.time_rngs:
track_ig_masks.append([0 if rng[0] - np.finfo('float').eps <= length
<= rng[1] + np.finfo('float').eps else 1 for length in track_lengths])
# for YouTubeVIS evaluation tracks with crowd tag are not evaluated
if is_gt and iscrowd:
track_ig_masks = [np.logical_or(mask, iscrowd) for mask in track_ig_masks]
return track_ig_masks
@staticmethod
def _compute_bb_track_iou(dt_track, gt_track, boxformat='xywh'):
"""
Calculates the track IoU for one detected track and one ground truth track for bounding boxes
:param dt_track: the detected track (format: dictionary with frame index as keys and
numpy arrays as values)
:param gt_track: the ground truth track (format: dictionary with frame index as keys and
numpy array as values)
:param boxformat: the format of the boxes
:return: the track IoU
"""
intersect = 0
union = 0
image_ids = set(gt_track.keys()) | set(dt_track.keys())
for image in image_ids:
g = gt_track.get(image, None)
d = dt_track.get(image, None)
if boxformat == 'xywh':
if d is not None and g is not None:
dx, dy, dw, dh = d
gx, gy, gw, gh = g
w = max(min(dx + dw, gx + gw) - max(dx, gx), 0)
h = max(min(dy + dh, gy + gh) - max(dy, gy), 0)
i = w * h
u = dw * dh + gw * gh - i
intersect += i
union += u
elif d is None and g is not None:
union += g[2] * g[3]
elif d is not None and g is None:
union += d[2] * d[3]
elif boxformat == 'x0y0x1y1':
if d is not None and g is not None:
dx0, dy0, dx1, dy1 = d
gx0, gy0, gx1, gy1 = g
w = max(min(dx1, gx1) - max(dx0, gx0), 0)
h = max(min(dy1, gy1) - max(dy0, gy0), 0)
i = w * h
u = (dx1 - dx0) * (dy1 - dy0) + (gx1 - gx0) * (gy1 - gy0) - i
intersect += i
union += u
elif d is None and g is not None:
union += (g[2] - g[0]) * (g[3] - g[1])
elif d is not None and g is None:
union += (d[2] - d[0]) * (d[3] - d[1])
else:
raise TrackEvalException('BoxFormat not implemented')
if intersect > union:
raise TrackEvalException("Intersection value > union value. Are the box values corrupted?")
return intersect / union if union > 0 else 0
@staticmethod
def _compute_mask_track_iou(dt_track, gt_track):
"""
Calculates the track IoU for one detected track and one ground truth track for segmentation masks
:param dt_track: the detected track (format: dictionary with frame index as keys and
pycocotools rle encoded masks as values)
:param gt_track: the ground truth track (format: dictionary with frame index as keys and
pycocotools rle encoded masks as values)
:return: the track IoU
"""
# only loaded when needed to reduce minimum requirements
from pycocotools import mask as mask_utils
intersect = .0
union = .0
image_ids = set(gt_track.keys()) | set(dt_track.keys())
for image in image_ids:
g = gt_track.get(image, None)
d = dt_track.get(image, None)
if d and g:
intersect += mask_utils.area(mask_utils.merge([d, g], True))
union += mask_utils.area(mask_utils.merge([d, g], False))
elif not d and g:
union += mask_utils.area(g)
elif d and not g:
union += mask_utils.area(d)
if union < 0.0 - np.finfo('float').eps:
raise TrackEvalException("Union value < 0. Are the segmentaions corrupted?")
if intersect > union:
raise TrackEvalException("Intersection value > union value. Are the segmentations corrupted?")
iou = intersect / union if union > 0.0 + np.finfo('float').eps else 0.0
return iou
@staticmethod
def _compute_track_ious(dt, gt, iou_function='bbox', boxformat='xywh'):
"""
Calculate track IoUs for a set of ground truth tracks and a set of detected tracks
"""
if len(gt) == 0 and len(dt) == 0:
return []
if iou_function == 'bbox':
track_iou_function = partial(TrackMAP._compute_bb_track_iou, boxformat=boxformat)
elif iou_function == 'mask':
track_iou_function = partial(TrackMAP._compute_mask_track_iou)
else:
raise Exception('IoU function not implemented')
ious = np.zeros([len(dt), len(gt)])
for i, j in np.ndindex(ious.shape):
ious[i, j] = track_iou_function(dt[i], gt[j])
return ious
@staticmethod
def _row_print(*argv):
"""Prints results in an evenly spaced rows, with more space in first row"""
if len(argv) == 1:
argv = argv[0]
to_print = '%-40s' % argv[0]
for v in argv[1:]:
to_print += '%-12s' % str(v)
print(to_print)

View File

@@ -0,0 +1,131 @@
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_metric import _BaseMetric
from .. import _timing
class VACE(_BaseMetric):
"""Class which implements the VACE metrics.
The metrics are described in:
Manohar et al. (2006) "Performance Evaluation of Object Detection and Tracking in Video"
https://link.springer.com/chapter/10.1007/11612704_16
This implementation uses the "relaxed" variant of the metrics,
where an overlap threshold is applied in each frame.
"""
def __init__(self, config=None):
super().__init__()
self.integer_fields = ['VACE_IDs', 'VACE_GT_IDs', 'num_non_empty_timesteps']
self.float_fields = ['STDA', 'ATA', 'FDA', 'SFDA']
self.fields = self.integer_fields + self.float_fields
self.summary_fields = ['SFDA', 'ATA']
# Fields that are accumulated over multiple videos.
self._additive_fields = self.integer_fields + ['STDA', 'FDA']
self.threshold = 0.5
@_timing.time
def eval_sequence(self, data):
"""Calculates VACE metrics for one sequence.
Depends on the fields:
data['num_gt_ids']
data['num_tracker_ids']
data['gt_ids']
data['tracker_ids']
data['similarity_scores']
"""
res = {}
# Obtain Average Tracking Accuracy (ATA) using track correspondence.
# Obtain counts necessary to compute temporal IOU.
# Assume that integer counts can be represented exactly as floats.
potential_matches_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
gt_id_count = np.zeros(data['num_gt_ids'])
tracker_id_count = np.zeros(data['num_tracker_ids'])
both_present_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
# Count the number of frames in which two tracks satisfy the overlap criterion.
matches_mask = np.greater_equal(data['similarity_scores'][t], self.threshold)
match_idx_gt, match_idx_tracker = np.nonzero(matches_mask)
potential_matches_count[gt_ids_t[match_idx_gt], tracker_ids_t[match_idx_tracker]] += 1
# Count the number of frames in which the tracks are present.
gt_id_count[gt_ids_t] += 1
tracker_id_count[tracker_ids_t] += 1
both_present_count[gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]] += 1
# Number of frames in which either track is present (union of the two sets of frames).
union_count = (gt_id_count[:, np.newaxis]
+ tracker_id_count[np.newaxis, :]
- both_present_count)
# The denominator should always be non-zero if all tracks are non-empty.
with np.errstate(divide='raise', invalid='raise'):
temporal_iou = potential_matches_count / union_count
# Find assignment that maximizes temporal IOU.
match_rows, match_cols = linear_sum_assignment(-temporal_iou)
res['STDA'] = temporal_iou[match_rows, match_cols].sum()
res['VACE_IDs'] = data['num_tracker_ids']
res['VACE_GT_IDs'] = data['num_gt_ids']
# Obtain Frame Detection Accuracy (FDA) using per-frame correspondence.
non_empty_count = 0
fda = 0
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
n_g = len(gt_ids_t)
n_d = len(tracker_ids_t)
if not (n_g or n_d):
continue
# n_g > 0 or n_d > 0
non_empty_count += 1
if not (n_g and n_d):
continue
# n_g > 0 and n_d > 0
spatial_overlap = data['similarity_scores'][t]
match_rows, match_cols = linear_sum_assignment(-spatial_overlap)
overlap_ratio = spatial_overlap[match_rows, match_cols].sum()
fda += overlap_ratio / (0.5 * (n_g + n_d))
res['FDA'] = fda
res['num_non_empty_timesteps'] = non_empty_count
res.update(self._compute_final_fields(res))
return res
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=True):
"""Combines metrics across all classes by averaging over the class values.
If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
"""
res = {}
for field in self.fields:
if ignore_empty_classes:
res[field] = np.mean([v[field] for v in all_res.values()
if v['VACE_GT_IDs'] > 0 or v['VACE_IDs'] > 0], axis=0)
else:
res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
return res
def combine_classes_det_averaged(self, all_res):
"""Combines metrics across all classes by averaging over the detection values"""
res = {}
for field in self._additive_fields:
res[field] = _BaseMetric._combine_sum(all_res, field)
res = self._compute_final_fields(res)
return res
def combine_sequences(self, all_res):
"""Combines metrics across all sequences"""
res = {}
for header in self._additive_fields:
res[header] = _BaseMetric._combine_sum(all_res, header)
res.update(self._compute_final_fields(res))
return res
@staticmethod
def _compute_final_fields(additive):
final = {}
with np.errstate(invalid='ignore'): # Permit nan results.
final['ATA'] = (additive['STDA'] /
(0.5 * (additive['VACE_IDs'] + additive['VACE_GT_IDs'])))
final['SFDA'] = additive['FDA'] / additive['num_non_empty_timesteps']
return final

View File

@@ -0,0 +1,230 @@
import os
import numpy as np
from .utils import TrackEvalException
def plot_compare_trackers(tracker_folder, tracker_list, cls, output_folder, plots_list=None):
"""Create plots which compare metrics across different trackers."""
# Define what to plot
if plots_list is None:
plots_list = get_default_plots_list()
# Load data
data = load_multiple_tracker_summaries(tracker_folder, tracker_list, cls)
out_loc = os.path.join(output_folder, cls)
# Plot
for args in plots_list:
create_comparison_plot(data, out_loc, *args)
def get_default_plots_list():
# y_label, x_label, sort_label, bg_label, bg_function
plots_list = [
['AssA', 'DetA', 'HOTA', 'HOTA', 'geometric_mean'],
['AssPr', 'AssRe', 'HOTA', 'AssA', 'jaccard'],
['DetPr', 'DetRe', 'HOTA', 'DetA', 'jaccard'],
['HOTA(0)', 'LocA(0)', 'HOTA', 'HOTALocA(0)', 'multiplication'],
['HOTA', 'LocA', 'HOTA', None, None],
['HOTA', 'MOTA', 'HOTA', None, None],
['HOTA', 'IDF1', 'HOTA', None, None],
['IDF1', 'MOTA', 'HOTA', None, None],
]
return plots_list
def load_multiple_tracker_summaries(tracker_folder, tracker_list, cls):
"""Loads summary data for multiple trackers."""
data = {}
for tracker in tracker_list:
with open(os.path.join(tracker_folder, tracker, cls + '_summary.txt')) as f:
keys = next(f).split(' ')
done = False
while not done:
values = next(f).split(' ')
if len(values) == len(keys):
done = True
data[tracker] = dict(zip(keys, map(float, values)))
return data
def create_comparison_plot(data, out_loc, y_label, x_label, sort_label, bg_label=None, bg_function=None, settings=None):
""" Creates a scatter plot comparing multiple trackers between two metric fields, with one on the x-axis and the
other on the y axis. Adds pareto optical lines and (optionally) a background contour.
Inputs:
data: dict of dicts such that data[tracker_name][metric_field_name] = float
y_label: the metric_field_name to be plotted on the y-axis
x_label: the metric_field_name to be plotted on the x-axis
sort_label: the metric_field_name by which trackers are ordered and ranked
bg_label: the metric_field_name by which (optional) background contours are plotted
bg_function: the (optional) function bg_function(x,y) which converts the x_label / y_label values into bg_label.
settings: dict of plot settings with keys:
'gap_val': gap between axis ticks and bg curves.
'num_to_plot': maximum number of trackers to plot
"""
# Only loaded when run to reduce minimum requirements
from matplotlib import pyplot as plt
# Get plot settings
if settings is None:
gap_val = 2
num_to_plot = 20
else:
gap_val = settings['gap_val']
num_to_plot = settings['num_to_plot']
if (bg_label is None) != (bg_function is None):
raise TrackEvalException('bg_function and bg_label must either be both given or neither given.')
# Extract data
tracker_names = np.array(list(data.keys()))
sort_index = np.array([data[t][sort_label] for t in tracker_names]).argsort()[::-1]
x_values = np.array([data[t][x_label] for t in tracker_names])[sort_index][:num_to_plot]
y_values = np.array([data[t][y_label] for t in tracker_names])[sort_index][:num_to_plot]
# Print info on what is being plotted
tracker_names = tracker_names[sort_index][:num_to_plot]
print('\nPlotting %s vs %s, for the following (ordered) trackers:' % (y_label, x_label))
for i, name in enumerate(tracker_names):
print('%i: %s' % (i+1, name))
# Find best fitting boundaries for data
boundaries = _get_boundaries(x_values, y_values, round_val=gap_val/2)
fig = plt.figure()
# Plot background contour
if bg_function is not None:
_plot_bg_contour(bg_function, boundaries, gap_val)
# Plot pareto optimal lines
_plot_pareto_optimal_lines(x_values, y_values)
# Plot data points with number labels
labels = np.arange(len(y_values)) + 1
plt.plot(x_values, y_values, 'b.', markersize=15)
for xx, yy, l in zip(x_values, y_values, labels):
plt.text(xx, yy, str(l), color="red", fontsize=15)
# Add extra explanatory text to plots
plt.text(0, -0.11, 'label order:\nHOTA', horizontalalignment='left', verticalalignment='center',
transform=fig.axes[0].transAxes, color="red", fontsize=12)
if bg_label is not None:
plt.text(1, -0.11, 'curve values:\n' + bg_label, horizontalalignment='right', verticalalignment='center',
transform=fig.axes[0].transAxes, color="grey", fontsize=12)
plt.xlabel(x_label, fontsize=15)
plt.ylabel(y_label, fontsize=15)
title = y_label + ' vs ' + x_label
if bg_label is not None:
title += ' (' + bg_label + ')'
plt.title(title, fontsize=17)
plt.xticks(np.arange(0, 100, gap_val))
plt.yticks(np.arange(0, 100, gap_val))
min_x, max_x, min_y, max_y = boundaries
plt.xlim(min_x, max_x)
plt.ylim(min_y, max_y)
plt.gca().set_aspect('equal', adjustable='box')
plt.tight_layout()
os.makedirs(out_loc, exist_ok=True)
filename = os.path.join(out_loc, title.replace(' ', '_'))
plt.savefig(filename + '.pdf', bbox_inches='tight', pad_inches=0.05)
plt.savefig(filename + '.png', bbox_inches='tight', pad_inches=0.05)
def _get_boundaries(x_values, y_values, round_val):
x1 = np.min(np.floor((x_values - 0.5) / round_val) * round_val)
x2 = np.max(np.ceil((x_values + 0.5) / round_val) * round_val)
y1 = np.min(np.floor((y_values - 0.5) / round_val) * round_val)
y2 = np.max(np.ceil((y_values + 0.5) / round_val) * round_val)
x_range = x2 - x1
y_range = y2 - y1
max_range = max(x_range, y_range)
x_center = (x1 + x2) / 2
y_center = (y1 + y2) / 2
min_x = max(x_center - max_range / 2, 0)
max_x = min(x_center + max_range / 2, 100)
min_y = max(y_center - max_range / 2, 0)
max_y = min(y_center + max_range / 2, 100)
return min_x, max_x, min_y, max_y
def geometric_mean(x, y):
return np.sqrt(x * y)
def jaccard(x, y):
x = x / 100
y = y / 100
return 100 * (x * y) / (x + y - x * y)
def multiplication(x, y):
return x * y / 100
bg_function_dict = {
"geometric_mean": geometric_mean,
"jaccard": jaccard,
"multiplication": multiplication,
}
def _plot_bg_contour(bg_function, plot_boundaries, gap_val):
""" Plot background contour. """
# Only loaded when run to reduce minimum requirements
from matplotlib import pyplot as plt
# Plot background contour
min_x, max_x, min_y, max_y = plot_boundaries
x = np.arange(min_x, max_x, 0.1)
y = np.arange(min_y, max_y, 0.1)
x_grid, y_grid = np.meshgrid(x, y)
if bg_function in bg_function_dict.keys():
z_grid = bg_function_dict[bg_function](x_grid, y_grid)
else:
raise TrackEvalException("background plotting function '%s' is not defined." % bg_function)
levels = np.arange(0, 100, gap_val)
con = plt.contour(x_grid, y_grid, z_grid, levels, colors='grey')
def bg_format(val):
s = '{:1f}'.format(val)
return '{:.0f}'.format(val) if s[-1] == '0' else s
con.levels = [bg_format(val) for val in con.levels]
plt.clabel(con, con.levels, inline=True, fmt='%r', fontsize=8)
def _plot_pareto_optimal_lines(x_values, y_values):
""" Plot pareto optimal lines """
# Only loaded when run to reduce minimum requirements
from matplotlib import pyplot as plt
# Plot pareto optimal lines
cxs = x_values
cys = y_values
best_y = np.argmax(cys)
x_pareto = [0, cxs[best_y]]
y_pareto = [cys[best_y], cys[best_y]]
t = 2
remaining = cxs > x_pareto[t - 1]
cys = cys[remaining]
cxs = cxs[remaining]
while len(cxs) > 0 and len(cys) > 0:
best_y = np.argmax(cys)
x_pareto += [x_pareto[t - 1], cxs[best_y]]
y_pareto += [cys[best_y], cys[best_y]]
t += 2
remaining = cxs > x_pareto[t - 1]
cys = cys[remaining]
cxs = cxs[remaining]
x_pareto.append(x_pareto[t - 1])
y_pareto.append(0)
plt.plot(np.array(x_pareto), np.array(y_pareto), '--r')

View File

@@ -0,0 +1,146 @@
import os
import csv
import argparse
from collections import OrderedDict
def init_config(config, default_config, name=None):
"""Initialise non-given config values with defaults"""
if config is None:
config = default_config
else:
for k in default_config.keys():
if k not in config.keys():
config[k] = default_config[k]
if name and config['PRINT_CONFIG']:
print('\n%s Config:' % name)
for c in config.keys():
print('%-20s : %-30s' % (c, config[c]))
return config
def update_config(config):
"""
Parse the arguments of a script and updates the config values for a given value if specified in the arguments.
:param config: the config to update
:return: the updated config
"""
parser = argparse.ArgumentParser()
for setting in config.keys():
if type(config[setting]) == list or type(config[setting]) == type(None):
parser.add_argument("--" + setting, nargs='+')
else:
parser.add_argument("--" + setting)
args = parser.parse_args().__dict__
for setting in args.keys():
if args[setting] is not None:
if type(config[setting]) == type(True):
if args[setting] == 'True':
x = True
elif args[setting] == 'False':
x = False
else:
raise Exception('Command line parameter ' + setting + 'must be True or False')
elif type(config[setting]) == type(1):
x = int(args[setting])
elif type(args[setting]) == type(None):
x = None
else:
x = args[setting]
config[setting] = x
return config
def get_code_path():
"""Get base path where code is"""
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
def validate_metrics_list(metrics_list):
"""Get names of metric class and ensures they are unique, further checks that the fields within each metric class
do not have overlapping names.
"""
metric_names = [metric.get_name() for metric in metrics_list]
# check metric names are unique
if len(metric_names) != len(set(metric_names)):
raise TrackEvalException('Code being run with multiple metrics of the same name')
fields = []
for m in metrics_list:
fields += m.fields
# check metric fields are unique
if len(fields) != len(set(fields)):
raise TrackEvalException('Code being run with multiple metrics with fields of the same name')
return metric_names
def write_summary_results(summaries, cls, output_folder):
"""Write summary results to file"""
fields = sum([list(s.keys()) for s in summaries], [])
values = sum([list(s.values()) for s in summaries], [])
# In order to remain consistent upon new fields being adding, for each of the following fields if they are present
# they will be output in the summary first in the order below. Any further fields will be output in the order each
# metric family is called, and within each family either in the order they were added to the dict (python >= 3.6) or
# randomly (python < 3.6).
default_order = ['HOTA', 'DetA', 'AssA', 'DetRe', 'DetPr', 'AssRe', 'AssPr', 'LocA', 'OWTA', 'HOTA(0)', 'LocA(0)',
'HOTALocA(0)', 'MOTA', 'MOTP', 'MODA', 'CLR_Re', 'CLR_Pr', 'MTR', 'PTR', 'MLR', 'CLR_TP', 'CLR_FN',
'CLR_FP', 'IDSW', 'MT', 'PT', 'ML', 'Frag', 'sMOTA', 'IDF1', 'IDR', 'IDP', 'IDTP', 'IDFN', 'IDFP',
'Dets', 'GT_Dets', 'IDs', 'GT_IDs']
default_ordered_dict = OrderedDict(zip(default_order, [None for _ in default_order]))
for f, v in zip(fields, values):
default_ordered_dict[f] = v
for df in default_order:
if default_ordered_dict[df] is None:
del default_ordered_dict[df]
fields = list(default_ordered_dict.keys())
values = list(default_ordered_dict.values())
out_file = os.path.join(output_folder, cls + '_summary.txt')
os.makedirs(os.path.dirname(out_file), exist_ok=True)
with open(out_file, 'w', newline='') as f:
writer = csv.writer(f, delimiter=' ')
writer.writerow(fields)
writer.writerow(values)
def write_detailed_results(details, cls, output_folder):
"""Write detailed results to file"""
sequences = details[0].keys()
fields = ['seq'] + sum([list(s['COMBINED_SEQ'].keys()) for s in details], [])
out_file = os.path.join(output_folder, cls + '_detailed.csv')
os.makedirs(os.path.dirname(out_file), exist_ok=True)
with open(out_file, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerow(fields)
for seq in sorted(sequences):
if seq == 'COMBINED_SEQ':
continue
writer.writerow([seq] + sum([list(s[seq].values()) for s in details], []))
writer.writerow(['COMBINED'] + sum([list(s['COMBINED_SEQ'].values()) for s in details], []))
def load_detail(file):
"""Loads detailed data for a tracker."""
data = {}
with open(file) as f:
for i, row_text in enumerate(f):
row = row_text.replace('\r', '').replace('\n', '').split(',')
if i == 0:
keys = row[1:]
continue
current_values = row[1:]
seq = row[0]
if seq == 'COMBINED':
seq = 'COMBINED_SEQ'
if (len(current_values) == len(keys)) and seq != '':
data[seq] = {}
for key, value in zip(keys, current_values):
data[seq][key] = float(value)
return data
class TrackEvalException(Exception):
"""Custom exception for catching expected errors."""
...

View File

@@ -0,0 +1,36 @@
"""
set gpus and ramdom seeds
"""
import os
import random
import numpy as np
from loguru import logger
import torch
import torch.backends.cudnn as cudnn
def select_device(device):
""" set device
Args:
device: str, 'cpu' or '0' or '1,2,3'-like
Return:
torch.device
"""
if device == 'cpu':
logger.info('Use CPU for training')
elif ',' in device: # multi-gpu
logger.error('Multi-GPU currently not supported')
else:
logger.info(f'set gpu {device}')
os.environ['CUDA_VISIBLE_DEVICES'] = device
assert torch.cuda.is_available()
cuda = device != 'cpu' and torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
return device

View File

@@ -0,0 +1,26 @@
import numpy as np
import cv2
import os
def save_results(folder_name, seq_name, results, data_type='default'):
"""
write results to txt file
results: list row format: frame id, target id, box coordinate, class(optional)
to_file: file path(optional)
data_type: 'default' | 'mot_challenge', write data format, default or MOT submission
"""
assert len(results)
if not os.path.exists(f'./track_results/{folder_name}'):
os.makedirs(f'./track_results/{folder_name}')
with open(os.path.join('./track_results', folder_name, seq_name + '.txt'), 'w') as f:
for frame_id, target_ids, tlwhs, clses, scores in results:
for id, tlwh, score in zip(target_ids, tlwhs, scores):
f.write(f'{frame_id},{id},{tlwh[0]:.2f},{tlwh[1]:.2f},{tlwh[2]:.2f},{tlwh[3]:.2f},{score:.2f},-1,-1,-1\n')
f.close()
return folder_name

View File

@@ -0,0 +1,64 @@
import cv2
import os
import numpy as np
from PIL import Image
def plot_img(img, frame_id, results, save_dir):
"""
img: np.ndarray: (H, W, C)
frame_id: int
results: [tlwhs, ids, clses]
save_dir: sr
plot images with bboxes of a seq
"""
if not os.path.exists(save_dir):
os.makedirs(save_dir)
assert img is not None
if len(img.shape) > 3:
img = img.squeeze(0)
img_ = np.ascontiguousarray(np.copy(img))
tlwhs, ids, clses = results[0], results[1], results[2]
for tlwh, id, cls in zip(tlwhs, ids, clses):
# convert tlwh to tlbr
tlbr = tuple([int(tlwh[0]), int(tlwh[1]), int(tlwh[0] + tlwh[2]), int(tlwh[1] + tlwh[3])])
# draw a rect
cv2.rectangle(img_, tlbr[:2], tlbr[2:], get_color(id), thickness=3, )
# note the id and cls
text = f'{int(cls)}_{id}'
cv2.putText(img_, text, (tlbr[0], tlbr[1]), fontFace=cv2.FONT_HERSHEY_PLAIN, fontScale=1,
color=(255, 164, 0), thickness=2)
cv2.imwrite(filename=os.path.join(save_dir, f'{frame_id:05d}.jpg'), img=img_)
def get_color(idx):
"""
aux func for plot_seq
get a unique color for each id
"""
idx = idx * 3
color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
return color
def save_video(images_path):
"""
save images (frames) to a video
"""
images_list = sorted(os.listdir(images_path))
save_video_path = os.path.join(images_path, images_path.split('/')[-1] + '.mp4')
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
img0 = Image.open(os.path.join(images_path, images_list[0]))
vw = cv2.VideoWriter(save_video_path, fourcc, 15, img0.size)
for image_name in images_list:
image = cv2.imread(filename=os.path.join(images_path, image_name))
vw.write(image)

View File

@@ -0,0 +1,16 @@
from utils.general import non_max_suppression, scale_coords
def postprocess(out, conf_thresh, nms_thresh, img_size, ori_img_size):
"""
Args:
out: out from v7 model
det_config: configs
"""
out = out[0]
out = non_max_suppression(out, conf_thresh, nms_thresh, )[0]
out[:, :4] = scale_coords(img_size, out[:, :4], ori_img_size, ratio_pad=None).round()
# out: tlbr, conf, cls
return out

View File

@@ -0,0 +1,7 @@
train: /data/wujiapeng/codes/DroneGraphTracker/airmot/train.txt
val: /data/wujiapeng/codes/DroneGraphTracker/airmot/test.txt
test: /data/wujiapeng/codes/DroneGraphTracker/airmot/test.txt
nc: 2
names: ['plane', 'ship']

View File

@@ -0,0 +1,7 @@
train: /data/wujiapeng/codes/DroneGraphTracker/uavdt/train.txt
val: /data/wujiapeng/codes/DroneGraphTracker/uavdt/test.txt
test: /data/wujiapeng/codes/DroneGraphTracker/uavdt/test.txt
nc: 1
names: ['car']

View File

@@ -0,0 +1,7 @@
train: /data/wujiapeng/codes/DroneGraphTracker/visdrone/train.txt
val: /data/wujiapeng/codes/DroneGraphTracker/visdrone/test.txt
test: /data/wujiapeng/codes/DroneGraphTracker/visdrone/test.txt
nc: 5
names: ['pedestrain', 'car', 'van', 'truck', 'bus']

View File

@@ -0,0 +1,7 @@
train: /data/wujiapeng/codes/DroneGraphTracker/visdrone_det/train.txt
val: /data/wujiapeng/codes/DroneGraphTracker/visdrone_det/test.txt
test: /data/wujiapeng/codes/DroneGraphTracker/visdrone_det/test.txt
nc: 5
names: ['pedestrain', 'car', 'van', 'truck', 'bus']

View File

@@ -0,0 +1,6 @@
from ultralytics import YOLO
def postprocess(out):
out = out[0].boxes
return out.data

View File

@@ -0,0 +1,36 @@
import torch
from ultralytics import YOLO
import numpy as np
import argparse
def main(args):
""" main func
"""
model = YOLO(model=args.model_weight)
model.train(
data=args.data_cfg,
epochs=args.epochs,
batch=args.batch_size,
imgsz=args.img_sz,
patience=50, # epochs to wait for no observable improvement for early stopping of training
device=args.device,
)
if __name__ == '__main__':
parser = argparse.ArgumentParser("YOLO v8 train parser")
parser.add_argument('--model', type=str, default='yolov8s.yaml', help='yaml or pt file')
parser.add_argument('--model_weight', type=str, default='yolov8s.pt', help='')
parser.add_argument('--data_cfg', type=str, default='yolov8_utils/data_cfgs/visdrone.yaml', help='')
parser.add_argument('--epochs', type=int, default=30, help='')
parser.add_argument('--batch_size', type=int, default=8, help='')
parser.add_argument('--img_sz', type=int, default=1280, help='')
parser.add_argument('--device', type=str, default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
args = parser.parse_args()
main(args)

View File

@@ -0,0 +1,155 @@
import cv2
import numpy as np
from pycocotools.coco import COCO
import os
from yolox.data.datasets import Dataset
class MOTDataset(Dataset):
"""
COCO dataset class.
"""
def __init__(
self,
data_dir=None,
json_file="train_half.json",
name="train",
img_size=(608, 1088),
preproc=None,
):
"""
COCO dataset initialization. Annotation data are read into memory by COCO API.
Args:
data_dir (str): dataset root directory
json_file (str): COCO json file name
name (str): COCO data name (e.g. 'train2017' or 'val2017')
img_size (int): target image size after pre-processing
preproc: data augmentation strategy
"""
super().__init__(img_size)
self.data_dir = data_dir
self.json_file = json_file
self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file))
self.ids = self.coco.getImgIds()
self.class_ids = sorted(self.coco.getCatIds())
cats = self.coco.loadCats(self.coco.getCatIds())
self._classes = tuple([c["name"] for c in cats])
self.annotations = self._load_coco_annotations()
self.name = name
self.img_size = img_size
self.preproc = preproc
def __len__(self):
return len(self.ids)
def _load_coco_annotations(self):
return [self.load_anno_from_ids(_ids) for _ids in self.ids]
def load_anno_from_ids(self, id_):
im_ann = self.coco.loadImgs(id_)[0]
width = im_ann["width"]
height = im_ann["height"]
frame_id = im_ann["frame_id"]
video_id = im_ann["video_id"]
anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False)
annotations = self.coco.loadAnns(anno_ids)
objs = []
for obj in annotations:
x1 = obj["bbox"][0]
y1 = obj["bbox"][1]
x2 = x1 + obj["bbox"][2]
y2 = y1 + obj["bbox"][3]
if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
obj["clean_bbox"] = [x1, y1, x2, y2]
objs.append(obj)
num_objs = len(objs)
res = np.zeros((num_objs, 6))
for ix, obj in enumerate(objs):
cls = self.class_ids.index(obj["category_id"])
res[ix, 0:4] = obj["clean_bbox"]
res[ix, 4] = cls
res[ix, 5] = obj["track_id"]
file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg"
img_info = (height, width, frame_id, video_id, file_name)
del im_ann, annotations
return (res, img_info, file_name)
def load_anno(self, index):
return self.annotations[index][0]
def pull_item(self, index):
id_ = self.ids[index]
res, img_info, file_name = self.annotations[index]
# load image and preprocess
img_file = os.path.join(
self.data_dir, 'images', self.name, file_name
)
# for debug
# print(f"************{img_file}************")
# exit()
img = cv2.imread(img_file)
assert img is not None
return img, res.copy(), img_info, np.array([id_])
@Dataset.resize_getitem
def __getitem__(self, index):
"""
One image / label pair for the given index is picked up and pre-processed.
Args:
index (int): data index
Returns:
img (numpy.ndarray): pre-processed image
padded_labels (torch.Tensor): pre-processed label data.
The shape is :math:`[max_labels, 5]`.
each label consists of [class, xc, yc, w, h]:
class (float): class index.
xc, yc (float) : center of bbox whose values range from 0 to 1.
w, h (float) : size of bbox whose values range from 0 to 1.
info_img : tuple of h, w, nh, nw, dx, dy.
h, w (int): original shape of the image
nh, nw (int): shape of the resized image without padding
dx, dy (int): pad size
img_id (int): same as the input index. Used for evaluation.
"""
img, target, img_info, img_id = self.pull_item(index)
if self.preproc is not None:
img, target = self.preproc(img, target, self.input_dim)
return img, target, img_info, img_id
class VisDroneDataset(MOTDataset):
def __init__(self, data_dir=None, json_file="train_half.json", name="train", img_size=(608, 1088), preproc=None):
super().__init__(data_dir, json_file, name, img_size, preproc)
self.DATA_ROOT = '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019'
self.VisD_dict = {'train':'VisDrone2019-MOT-train',
'test':'VisDrone2019-MOT-test-dev'}
def pull_item(self, index):
id_ = self.ids[index]
res, img_info, file_name = self.annotations[index]
# load image and preprocess
# img_file = os.path.join(
# self.data_dir, self.name, file_name
# )
img_file = os.path.join(
self.DATA_ROOT, self.VisD_dict[self.name], 'sequences', file_name
)
img = cv2.imread(img_file)
assert img is not None
return img, res.copy(), img_info, np.array([id_])

View File

@@ -0,0 +1,29 @@
import torch
from yolox.utils import postprocess
def postprocess_yolox(out, num_classes, conf_thresh, img, ori_img):
"""
convert out to -> (tlbr, conf, cls)
"""
out = postprocess(out, num_classes, conf_thresh, )[0] # (tlbr, obj_conf, cls_conf, cls)
if out is None: return out
# merge conf
out[:, 4] *= out[:, 5]
out[:, 5] = out[:, -1]
out = out[:, :-1]
# scale to origin size
img_size = [img.shape[-2], img.shape[-1]] # h, w
ori_img_size = [ori_img.shape[0], ori_img.shape[1]] # h0, w0
img_h, img_w = img_size[0], img_size[1]
scale = min(float(img_h) / ori_img_size[0], float(img_w) / ori_img_size[1])
out[:, :4] /= scale
return out

View File

@@ -0,0 +1,122 @@
from loguru import logger
import torch
import torch.backends.cudnn as cudnn
from yolox.core import Trainer, launch
from yolox.exp import get_exp
import argparse
import random
import warnings
def make_parser():
parser = argparse.ArgumentParser("YOLOX train parser")
parser.add_argument("-expn", "--experiment-name", type=str, default=None)
parser.add_argument("-n", "--name", type=str, default=None, help="model name")
# distributed
parser.add_argument(
"--dist-backend", default="nccl", type=str, help="distributed backend"
)
parser.add_argument(
"--dist-url",
default=None,
type=str,
help="url used to set up distributed training",
)
parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size")
parser.add_argument(
"-d", "--devices", default=None, type=int, help="device for training"
)
parser.add_argument(
"--local_rank", default=0, type=int, help="local rank for dist training"
)
parser.add_argument(
"-f",
"--exp_file",
default=None,
type=str,
help="plz input your expriment description file",
)
parser.add_argument(
"--resume", default=False, action="store_true", help="resume training"
)
parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file")
parser.add_argument(
"-e",
"--start_epoch",
default=None,
type=int,
help="resume training start epoch",
)
parser.add_argument(
"--num_machines", default=1, type=int, help="num of node for training"
)
parser.add_argument(
"--machine_rank", default=0, type=int, help="node rank for multi-node training"
)
parser.add_argument(
"--fp16",
dest="fp16",
default=True,
action="store_true",
help="Adopting mix precision training.",
)
parser.add_argument(
"-o",
"--occupy",
dest="occupy",
default=False,
action="store_true",
help="occupy GPU memory first for training.",
)
parser.add_argument(
"opts",
help="Modify config options using the command-line",
default=None,
nargs=argparse.REMAINDER,
)
return parser
@logger.catch
def main(exp, args):
if exp.seed is not None:
random.seed(exp.seed)
torch.manual_seed(exp.seed)
cudnn.deterministic = True
warnings.warn(
"You have chosen to seed training. This will turn on the CUDNN deterministic setting, "
"which can slow down your training considerably! You may see unexpected behavior "
"when restarting from checkpoints."
)
# set environment variables for distributed training
cudnn.benchmark = True
trainer = Trainer(exp, args)
trainer.train()
if __name__ == "__main__":
args = make_parser().parse_args()
exp = get_exp(args.exp_file, args.name)
exp.merge(args.opts)
if not args.experiment_name:
args.experiment_name = exp.exp_name
num_gpu = torch.cuda.device_count() if args.devices is None else args.devices
assert num_gpu <= torch.cuda.device_count()
launch(
main,
num_gpu,
args.num_machines,
args.machine_rank,
backend=args.dist_backend,
dist_url=args.dist_url,
args=(exp, args),
)

View File

@@ -0,0 +1,144 @@
# encoding: utf-8
import os
import random
import torch
import torch.nn as nn
import torch.distributed as dist
from yolox.exp import Exp as MyExp
from yolox.data import get_yolox_datadir
class Exp(MyExp):
def __init__(self):
super(Exp, self).__init__()
self.num_classes = 1 # 1 for uavdt mot17
self.depth = 0.67
self.width = 0.75
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
self.train_ann = "train.json"
self.val_ann = "test.json"
self.input_size = (800, 1440)
self.test_size = (800, 1440)
self.random_size = (18, 32)
self.max_epoch = 80
self.print_interval = 20
self.eval_interval = 5
self.test_conf = 0.001
self.nmsthre = 0.7
self.no_aug_epochs = 10
self.basic_lr_per_img = 0.001 / 64.0
self.warmup_epochs = 1
def get_data_loader(self, batch_size, is_distributed, no_aug=False):
from yolox.data import (
TrainTransform,
YoloBatchSampler,
DataLoader,
InfiniteSampler,
MosaicDetection,
)
from mot_dataset import MOTDataset
dataset = MOTDataset(
# data_dir=os.path.join(get_yolox_datadir(), "mot"),
# data_dir='/data/wujiapeng/datasets/UAVDT',
data_dir='/data/wujiapeng/datasets/VisDrone2019/VisDrone2019',
json_file=self.train_ann,
name='train',
img_size=self.input_size,
preproc=TrainTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
max_labels=500,
),
)
dataset = MosaicDetection(
dataset,
mosaic=not no_aug,
img_size=self.input_size,
preproc=TrainTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
max_labels=1000,
),
degrees=self.degrees,
translate=self.translate,
scale=self.scale,
shear=self.shear,
perspective=self.perspective,
enable_mixup=self.enable_mixup,
)
self.dataset = dataset
if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = InfiniteSampler(
len(self.dataset), seed=self.seed if self.seed else 0
)
batch_sampler = YoloBatchSampler(
sampler=sampler,
batch_size=batch_size,
drop_last=False,
input_dimension=self.input_size,
mosaic=not no_aug,
)
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
dataloader_kwargs["batch_sampler"] = batch_sampler
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
return train_loader
def get_eval_loader(self, batch_size, is_distributed, testdev=False):
from yolox.data import ValTransform
from mot_dataset import MOTDataset
valdataset = MOTDataset(
# data_dir=os.path.join(get_yolox_datadir(), "mot"),
# data_dir='/data/wujiapeng/datasets/UAVDT',
data_dir='/data/wujiapeng/datasets/VisDrone2019/VisDrone2019',
json_file=self.val_ann,
img_size=self.test_size,
name='test',
preproc=ValTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
),
)
if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = torch.utils.data.distributed.DistributedSampler(
valdataset, shuffle=False
)
else:
sampler = torch.utils.data.SequentialSampler(valdataset)
dataloader_kwargs = {
"num_workers": self.data_num_workers,
"pin_memory": True,
"sampler": sampler,
}
dataloader_kwargs["batch_size"] = batch_size
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
return val_loader
def get_evaluator(self, batch_size, is_distributed, testdev=False):
from yolox.evaluators import COCOEvaluator
val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
return evaluator

View File

@@ -0,0 +1,142 @@
# encoding: utf-8
import os
import random
import torch
import torch.nn as nn
import torch.distributed as dist
from yolox.exp import Exp as MyExp
from yolox.data import get_yolox_datadir
class Exp(MyExp):
def __init__(self):
super(Exp, self).__init__()
self.num_classes = 1
self.depth = 1.33
self.width = 1.25
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
self.train_ann = "train.json"
self.val_ann = "test.json"
self.input_size = (800, 1440)
self.test_size = (800, 1440)
self.random_size = (18, 32)
self.max_epoch = 80
self.print_interval = 20
self.eval_interval = 5
self.test_conf = 0.001
self.nmsthre = 0.7
self.no_aug_epochs = 10
self.basic_lr_per_img = 0.001 / 64.0
self.warmup_epochs = 1
def get_data_loader(self, batch_size, is_distributed, no_aug=False):
from yolox.data import (
TrainTransform,
YoloBatchSampler,
DataLoader,
InfiniteSampler,
MosaicDetection,
)
from mot_dataset import MOTDataset
dataset = MOTDataset(
# data_dir=os.path.join(get_yolox_datadir(), "mot"),
data_dir='/data/wujiapeng/datasets/UAVDT',
json_file=self.train_ann,
name='train',
img_size=self.input_size,
preproc=TrainTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
max_labels=500,
),
)
dataset = MosaicDetection(
dataset,
mosaic=not no_aug,
img_size=self.input_size,
preproc=TrainTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
max_labels=1000,
),
degrees=self.degrees,
translate=self.translate,
scale=self.scale,
shear=self.shear,
perspective=self.perspective,
enable_mixup=self.enable_mixup,
)
self.dataset = dataset
if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = InfiniteSampler(
len(self.dataset), seed=self.seed if self.seed else 0
)
batch_sampler = YoloBatchSampler(
sampler=sampler,
batch_size=batch_size,
drop_last=False,
input_dimension=self.input_size,
mosaic=not no_aug,
)
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
dataloader_kwargs["batch_sampler"] = batch_sampler
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
return train_loader
def get_eval_loader(self, batch_size, is_distributed, testdev=False):
from yolox.data import ValTransform
from mot_dataset import MOTDataset
valdataset = MOTDataset(
# data_dir=os.path.join(get_yolox_datadir(), "mot"),
data_dir='/data/wujiapeng/datasets/UAVDT',
json_file=self.val_ann,
img_size=self.test_size,
name='test',
preproc=ValTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
),
)
if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = torch.utils.data.distributed.DistributedSampler(
valdataset, shuffle=False
)
else:
sampler = torch.utils.data.SequentialSampler(valdataset)
dataloader_kwargs = {
"num_workers": self.data_num_workers,
"pin_memory": True,
"sampler": sampler,
}
dataloader_kwargs["batch_size"] = batch_size
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
return val_loader
def get_evaluator(self, batch_size, is_distributed, testdev=False):
from yolox.evaluators import COCOEvaluator
val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
return evaluator