init test

2026-02-28 08:05:45 +00:00 · 2024-07-18 01:45:03 +02:00
parent 69dc05552e
commit 3e95611523
181 changed files with 34510 additions and 1 deletions
--- a/test/yolov7-tracker/tracker/config_files/mot17.yaml
+++ b/test/yolov7-tracker/tracker/config_files/mot17.yaml
@@ -0,0 +1,32 @@
+# Config file of MOT17 dataset
+
+DATASET_ROOT: '/data/wujiapeng/datasets/MOT17'  # your dataset root
+SPLIT: test
+CATEGORY_NAMES:  # category names to show
+  - 'pedestrian'
+
+CATEGORY_DICT:
+  0: 'pedestrian'
+
+CERTAIN_SEQS:
+  - 
+IGNORE_SEQS:  # Seqs you want to ignore
+  - 
+
+YAML_DICT: ''  # NOTE: ONLY for yolo v5 model loader(func DetectMultiBackend)
+  
+TRACK_EVAL:  # If use TrackEval to evaluate, use these configs
+  'DISPLAY_LESS_PROGRESS': False
+  'GT_FOLDER': '/data/wujiapeng/datasets/MOT17/train'
+  'TRACKERS_FOLDER': './tracker/results'
+  'SKIP_SPLIT_FOL': True
+  'TRACKER_SUB_FOLDER': ''
+  'SEQ_INFO': 
+    'MOT17-02-SDP': null
+    'MOT17-04-SDP': null
+    'MOT17-05-SDP': null
+    'MOT17-09-SDP': null
+    'MOT17-10-SDP': null
+    'MOT17-11-SDP': null
+    'MOT17-13-SDP': null
+  'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt'
--- a/test/yolov7-tracker/tracker/config_files/uavdt.yaml
+++ b/test/yolov7-tracker/tracker/config_files/uavdt.yaml
@@ -0,0 +1,26 @@
+# Config file of UAVDT dataset
+
+DATASET_ROOT: '/data/wujiapeng/datasets/UAVDT'  # your dataset root
+SPLIT: test
+CATEGORY_NAMES:  # category names to show
+  - 'car'
+
+CATEGORY_DICT:
+  0: 'car'
+
+CERTAIN_SEQS:
+  - 
+IGNORE_SEQS:  # Seqs you want to ignore
+  - 
+
+YAML_DICT:  './data/UAVDT.yaml'  # NOTE: ONLY for yolo v5 model loader(func DetectMultiBackend)
+  
+TRACK_EVAL:  # If use TrackEval to evaluate, use these configs
+  'DISPLAY_LESS_PROGRESS': False
+  'GT_FOLDER': '/data/wujiapeng/datasets/UAVDT/UAV-benchmark-M'
+  'TRACKERS_FOLDER': './tracker/results'
+  'SKIP_SPLIT_FOL': True
+  'TRACKER_SUB_FOLDER': ''
+  'SEQ_INFO': 
+    'M0101': 407
+  'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt'
--- a/test/yolov7-tracker/tracker/config_files/visdrone.yaml
+++ b/test/yolov7-tracker/tracker/config_files/visdrone.yaml
@@ -0,0 +1,61 @@
+# Config file of VisDrone dataset
+
+DATASET_ROOT: '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019'
+SPLIT: test
+CATEGORY_NAMES: 
+  - 'pedestrain'
+  - 'people'
+  - 'bicycle'
+  - 'car'
+  - 'van'
+  - 'truck'
+  - 'tricycle'
+  - 'awning-tricycle'
+  - 'bus'
+  - 'motor'
+
+CATEGORY_DICT:
+  0: 'pedestrain'
+  1: 'people'
+  2: 'bicycle'
+  3: 'car'
+  4: 'van'
+  5: 'truck'
+  6: 'tricycle'
+  7: 'awning-tricycle'
+  8: 'bus'
+  9: 'motor'
+
+CERTAIN_SEQS:
+  - 
+
+IGNORE_SEQS:  # Seqs you want to ignore
+  - 
+
+YAML_DICT:  './data/Visdrone_all.yaml'  # NOTE: ONLY for yolo v5 model loader(func DetectMultiBackend)
+
+TRACK_EVAL:  # If use TrackEval to evaluate, use these configs
+  'DISPLAY_LESS_PROGRESS': False
+  'GT_FOLDER': '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019/VisDrone2019-MOT-test-dev/annotations'
+  'TRACKERS_FOLDER': './tracker/results'
+  'SKIP_SPLIT_FOL': True
+  'TRACKER_SUB_FOLDER': ''
+  'SEQ_INFO': 
+    'uav0000009_03358_v': 219 
+    'uav0000073_00600_v': 328
+    'uav0000073_04464_v': 312
+    'uav0000077_00720_v': 780
+    'uav0000088_00290_v': 296
+    'uav0000119_02301_v': 179
+    'uav0000120_04775_v': 1000
+    'uav0000161_00000_v': 308
+    'uav0000188_00000_v': 260
+    'uav0000201_00000_v': 677
+    'uav0000249_00001_v': 360
+    'uav0000249_02688_v': 244
+    'uav0000297_00000_v': 146
+    'uav0000297_02761_v': 373
+    'uav0000306_00230_v': 420
+    'uav0000355_00001_v': 468
+    'uav0000370_00001_v': 265
+  'GT_LOC_FORMAT': '{gt_folder}/{seq}.txt'
--- a/test/yolov7-tracker/tracker/config_files/visdrone_part.yaml
+++ b/test/yolov7-tracker/tracker/config_files/visdrone_part.yaml
@@ -0,0 +1,51 @@
+# Config file of VisDrone dataset
+
+DATASET_ROOT: '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019'
+SPLIT: test
+CATEGORY_NAMES: 
+  - 'pedestrain'
+  - 'car'
+  - 'van'
+  - 'truck'
+  - 'bus'
+
+CATEGORY_DICT:
+  0: 'pedestrain'
+  1: 'car'
+  2: 'van'
+  3: 'truck'
+  4: 'bus'
+
+CERTAIN_SEQS:
+  - 
+
+IGNORE_SEQS:  # Seqs you want to ignore
+  - 
+
+YAML_DICT:  './data/Visdrone_all.yaml'  # NOTE: ONLY for yolo v5 model loader(func DetectMultiBackend)
+
+TRACK_EVAL:  # If use TrackEval to evaluate, use these configs
+  'DISPLAY_LESS_PROGRESS': False
+  'GT_FOLDER': '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019/VisDrone2019-MOT-test-dev/annotations'
+  'TRACKERS_FOLDER': './tracker/results'
+  'SKIP_SPLIT_FOL': True
+  'TRACKER_SUB_FOLDER': ''
+  'SEQ_INFO': 
+    'uav0000009_03358_v': 219 
+    'uav0000073_00600_v': 328
+    'uav0000073_04464_v': 312
+    'uav0000077_00720_v': 780
+    'uav0000088_00290_v': 296
+    'uav0000119_02301_v': 179
+    'uav0000120_04775_v': 1000
+    'uav0000161_00000_v': 308
+    'uav0000188_00000_v': 260
+    'uav0000201_00000_v': 677
+    'uav0000249_00001_v': 360
+    'uav0000249_02688_v': 244
+    'uav0000297_00000_v': 146
+    'uav0000297_02761_v': 373
+    'uav0000306_00230_v': 420
+    'uav0000355_00001_v': 468
+    'uav0000370_00001_v': 265
+  'GT_LOC_FORMAT': '{gt_folder}/{seq}.txt'
--- a/test/yolov7-tracker/tracker/my_timer.py
+++ b/test/yolov7-tracker/tracker/my_timer.py
@@ -0,0 +1,37 @@
+import time
+
+
+class Timer(object):
+    """A simple timer."""
+    def __init__(self):
+        self.total_time = 0.
+        self.calls = 0
+        self.start_time = 0.
+        self.diff = 0.
+        self.average_time = 0.
+
+        self.duration = 0.
+
+    def tic(self):
+        # using time.time instead of time.clock because time time.clock
+        # does not normalize for multithreading
+        self.start_time = time.time()
+
+    def toc(self, average=True):
+        self.diff = time.time() - self.start_time
+        self.total_time += self.diff
+        self.calls += 1
+        self.average_time = self.total_time / self.calls
+        if average:
+            self.duration = self.average_time
+        else:
+            self.duration = self.diff
+        return self.duration
+
+    def clear(self):
+        self.total_time = 0.
+        self.calls = 0
+        self.start_time = 0.
+        self.diff = 0.
+        self.average_time = 0.
+        self.duration = 0.
--- a/test/yolov7-tracker/tracker/track.py
+++ b/test/yolov7-tracker/tracker/track.py
@@ -0,0 +1,305 @@
+"""
+main code for track
+"""
+import sys, os
+import numpy as np
+import torch
+import cv2 
+from PIL import Image
+from tqdm import tqdm
+import yaml 
+
+from loguru import logger 
+import argparse
+
+from tracking_utils.envs import select_device
+from tracking_utils.tools import *
+from tracking_utils.visualization import plot_img, save_video
+from my_timer import Timer
+
+from tracker_dataloader import TestDataset
+
+# trackers 
+from trackers.byte_tracker import ByteTracker
+from trackers.sort_tracker import SortTracker
+from trackers.botsort_tracker import BotTracker
+from trackers.c_biou_tracker import C_BIoUTracker
+from trackers.ocsort_tracker import OCSortTracker
+from trackers.deepsort_tracker import DeepSortTracker
+from trackers.strongsort_tracker import StrongSortTracker
+from trackers.sparse_tracker import SparseTracker
+
+# YOLOX modules
+try:
+    from yolox.exp import get_exp 
+    from yolox_utils.postprocess import postprocess_yolox
+    from yolox.utils import fuse_model
+except Exception as e:
+    logger.warning(e)
+    logger.warning('Load yolox fail. If you want to use yolox, please check the installation.')
+    pass 
+
+# YOLOv7 modules
+try:
+    sys.path.append(os.getcwd())
+    from models.experimental import attempt_load
+    from utils.torch_utils import select_device, time_synchronized, TracedModel
+    from utils.general import non_max_suppression, scale_coords, check_img_size
+    from yolov7_utils.postprocess import postprocess as postprocess_yolov7
+
+except Exception as e:
+    logger.warning(e)
+    logger.warning('Load yolov7 fail. If you want to use yolov7, please check the installation.')
+    pass
+
+# YOLOv8 modules
+try:
+    from ultralytics import YOLO
+    from yolov8_utils.postprocess import postprocess as postprocess_yolov8
+
+except Exception as e:
+    logger.warning(e)
+    logger.warning('Load yolov8 fail. If you want to use yolov8, please check the installation.')
+    pass
+
+TRACKER_DICT = {
+    'sort': SortTracker, 
+    'bytetrack': ByteTracker, 
+    'botsort': BotTracker, 
+    'c_bioutrack': C_BIoUTracker, 
+    'ocsort': OCSortTracker, 
+    'deepsort': DeepSortTracker, 
+    'strongsort': StrongSortTracker, 
+    'sparsetrack': SparseTracker
+}
+
+def get_args():
+    
+    parser = argparse.ArgumentParser()
+
+    """general"""
+    parser.add_argument('--dataset', type=str, default='visdrone_part', help='visdrone, mot17, etc.')
+    parser.add_argument('--detector', type=str, default='yolov8', help='yolov7, yolox, etc.')
+    parser.add_argument('--tracker', type=str, default='sort', help='sort, deepsort, etc')
+    parser.add_argument('--reid_model', type=str, default='osnet_x0_25', help='osnet or deppsort')
+
+    parser.add_argument('--kalman_format', type=str, default='default', help='use what kind of Kalman, sort, deepsort, byte, etc.')
+    parser.add_argument('--img_size', type=int, default=1280, help='image size, [h, w]')
+
+    parser.add_argument('--conf_thresh', type=float, default=0.2, help='filter tracks')
+    parser.add_argument('--nms_thresh', type=float, default=0.7, help='thresh for NMS')
+    parser.add_argument('--iou_thresh', type=float, default=0.5, help='IOU thresh to filter tracks')
+
+    parser.add_argument('--device', type=str, default='6', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+
+    """yolox"""
+    parser.add_argument('--yolox_exp_file', type=str, default='./tracker/yolox_utils/yolox_m.py')
+
+    """model path"""
+    parser.add_argument('--detector_model_path', type=str, default='./weights/best.pt', help='model path')
+    parser.add_argument('--trace', type=bool, default=False, help='traced model of YOLO v7')
+    # other model path
+    parser.add_argument('--reid_model_path', type=str, default='./weights/osnet_x0_25.pth', help='path for reid model path')
+    parser.add_argument('--dhn_path', type=str, default='./weights/DHN.pth', help='path of DHN path for DeepMOT')
+
+   
+    """other options"""
+    parser.add_argument('--discard_reid', action='store_true', help='discard reid model, only work in bot-sort etc. which need a reid part')
+    parser.add_argument('--track_buffer', type=int, default=30, help='tracking buffer')
+    parser.add_argument('--gamma', type=float, default=0.1, help='param to control fusing motion and apperance dist')
+    parser.add_argument('--min_area', type=float, default=150, help='use to filter small bboxs')
+
+    parser.add_argument('--save_dir', type=str, default='track_results/{dataset_name}/{split}')
+    parser.add_argument('--save_images', action='store_true', help='save tracking results (image)')
+    parser.add_argument('--save_videos', action='store_true', help='save tracking results (video)')
+    
+    parser.add_argument('--track_eval', type=bool, default=True, help='Use TrackEval to evaluate')
+
+    return parser.parse_args()
+
+def main(args, dataset_cfgs):
+    
+    """1. set some params"""
+
+    # NOTE: if save video, you must save image
+    if args.save_videos:
+        args.save_images = True
+
+    """2. load detector"""
+    device = select_device(args.device)
+
+    if args.detector == 'yolox':
+
+        exp = get_exp(args.yolox_exp_file, None)  # TODO: modify num_classes etc. for specific dataset
+        model_img_size = exp.input_size
+        model = exp.get_model()
+        model.to(device)
+        model.eval()
+
+        logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
+        ckpt = torch.load(args.detector_model_path, map_location=device)
+        model.load_state_dict(ckpt['model'])
+        logger.info("loaded checkpoint done")
+        model = fuse_model(model)
+
+        stride = None  # match with yolo v7
+
+        logger.info(f'Now detector is on device {next(model.parameters()).device}')
+
+    elif args.detector == 'yolov7':
+
+        logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
+        model = attempt_load(args.detector_model_path, map_location=device)
+
+        # get inference img size
+        stride = int(model.stride.max())  # model stride
+        model_img_size = check_img_size(args.img_size, s=stride)  # check img_size
+
+        # Traced model
+        model = TracedModel(model, device=device, img_size=args.img_size)
+        # model.half()
+
+        logger.info("loaded checkpoint done")
+
+        logger.info(f'Now detector is on device {next(model.parameters()).device}')
+
+    elif args.detector == 'yolov8':
+
+        logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
+        model = YOLO(args.detector_model_path)
+
+        model_img_size = [None, None]  
+        stride = None 
+
+        logger.info("loaded checkpoint done")
+
+    else:
+        logger.error(f"detector {args.detector} is not supprted")
+        exit(0)
+
+    """3. load sequences"""
+    DATA_ROOT = dataset_cfgs['DATASET_ROOT']
+    SPLIT = dataset_cfgs['SPLIT']
+
+    seqs = sorted(os.listdir(os.path.join(DATA_ROOT, 'images', SPLIT)))
+    seqs = [seq for seq in seqs if seq not in dataset_cfgs['IGNORE_SEQS']]
+    if not None in dataset_cfgs['CERTAIN_SEQS']:
+        seqs = dataset_cfgs['CERTAIN_SEQS']
+
+    logger.info(f'Total {len(seqs)} seqs will be tracked: {seqs}')
+
+    save_dir = args.save_dir.format(dataset_name=args.dataset, split=SPLIT)
+
+
+    """4. Tracking"""
+
+    # set timer 
+    timer = Timer()
+    seq_fps = []
+
+    for seq in seqs:
+        logger.info(f'--------------tracking seq {seq}--------------')
+
+        dataset = TestDataset(DATA_ROOT, SPLIT, seq_name=seq, img_size=model_img_size, model=args.detector, stride=stride)
+
+        data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)
+
+        tracker = TRACKER_DICT[args.tracker](args, )
+
+        process_bar = enumerate(data_loader)
+        process_bar = tqdm(process_bar, total=len(data_loader), ncols=150)
+
+        results = []
+
+        for frame_idx, (ori_img, img) in process_bar:
+
+            # start timing this frame
+            timer.tic()
+
+            if args.detector == 'yolov8':
+                img = img.squeeze(0).cpu().numpy()
+
+            else:
+                img = img.to(device)  # (1, C, H, W)
+                img = img.float() 
+
+            ori_img = ori_img.squeeze(0)
+
+            # get detector output 
+            with torch.no_grad():
+                if args.detector == 'yolov8':
+                    output = model.predict(img, conf=args.conf_thresh, iou=args.nms_thresh)
+                else:
+                    output = model(img)
+
+            # postprocess output to original scales
+            if args.detector == 'yolox':
+                output = postprocess_yolox(output, len(dataset_cfgs['CATEGORY_NAMES']), conf_thresh=args.conf_thresh, 
+                                           img=img, ori_img=ori_img)
+
+            elif args.detector == 'yolov7':
+                output = postprocess_yolov7(output, args.conf_thresh, args.nms_thresh, img.shape[2:], ori_img.shape)
+
+            elif args.detector == 'yolov8':
+                output = postprocess_yolov8(output)
+            
+            else: raise NotImplementedError
+
+            # output: (tlbr, conf, cls)
+            # convert tlbr to tlwh
+            if isinstance(output, torch.Tensor): 
+                output = output.detach().cpu().numpy()
+            output[:, 2] -= output[:, 0]
+            output[:, 3] -= output[:, 1]
+            current_tracks = tracker.update(output, img, ori_img.cpu().numpy())
+        
+            # save results
+            cur_tlwh, cur_id, cur_cls, cur_score = [], [], [], []
+            for trk in current_tracks:
+                bbox = trk.tlwh
+                id = trk.track_id
+                cls = trk.category
+                score = trk.score
+
+                # filter low area bbox
+                if bbox[2] * bbox[3] > args.min_area:
+                    cur_tlwh.append(bbox)
+                    cur_id.append(id)
+                    cur_cls.append(cls)
+                    cur_score.append(score)
+                    # results.append((frame_id + 1, id, bbox, cls))
+
+            results.append((frame_idx + 1, cur_id, cur_tlwh, cur_cls, cur_score))
+
+            timer.toc()
+
+            if args.save_images:
+                plot_img(img=ori_img, frame_id=frame_idx, results=[cur_tlwh, cur_id, cur_cls], 
+                         save_dir=os.path.join(save_dir, 'vis_results'))
+
+        save_results(folder_name=os.path.join(args.dataset, SPLIT), 
+                     seq_name=seq, 
+                     results=results)
+        
+        # show the fps
+        seq_fps.append(frame_idx / timer.total_time)
+        logger.info(f'fps of seq {seq}: {seq_fps[-1]}')
+        timer.clear()
+        
+        if args.save_videos:
+            save_video(images_path=os.path.join(save_dir, 'vis_results'))
+            logger.info(f'save video of {seq} done')
+
+    # show the average fps
+    logger.info(f'average fps: {np.mean(seq_fps)}')
+
+
+if __name__ == '__main__':
+
+    args = get_args()
+
+    with open(f'./tracker/config_files/{args.dataset}.yaml', 'r') as f:
+        cfgs = yaml.load(f, Loader=yaml.FullLoader)
+
+        
+    main(args, cfgs)
--- a/test/yolov7-tracker/tracker/track_demo.py
+++ b/test/yolov7-tracker/tracker/track_demo.py
@@ -0,0 +1,266 @@
+"""
+main code for track
+"""
+import sys, os
+import numpy as np
+import torch
+import cv2 
+from PIL import Image
+from tqdm import tqdm
+import yaml 
+
+from loguru import logger 
+import argparse
+
+from tracking_utils.envs import select_device
+from tracking_utils.tools import *
+from tracking_utils.visualization import plot_img, save_video
+
+from tracker_dataloader import TestDataset, DemoDataset
+
+# trackers 
+from trackers.byte_tracker import ByteTracker
+from trackers.sort_tracker import SortTracker
+from trackers.botsort_tracker import BotTracker
+from trackers.c_biou_tracker import C_BIoUTracker
+from trackers.ocsort_tracker import OCSortTracker
+from trackers.deepsort_tracker import DeepSortTracker
+
+# YOLOX modules
+try:
+    from yolox.exp import get_exp 
+    from yolox_utils.postprocess import postprocess_yolox
+    from yolox.utils import fuse_model
+except Exception as e:
+    logger.warning(e)
+    logger.warning('Load yolox fail. If you want to use yolox, please check the installation.')
+    pass 
+
+# YOLOv7 modules
+try:
+    sys.path.append(os.getcwd())
+    from models.experimental import attempt_load
+    from utils.torch_utils import select_device, time_synchronized, TracedModel
+    from utils.general import non_max_suppression, scale_coords, check_img_size
+    from yolov7_utils.postprocess import postprocess as postprocess_yolov7
+
+except Exception as e:
+    logger.warning(e)
+    logger.warning('Load yolov7 fail. If you want to use yolov7, please check the installation.')
+    pass
+
+# YOLOv8 modules
+try:
+    from ultralytics import YOLO
+    from yolov8_utils.postprocess import postprocess as postprocess_yolov8
+
+except Exception as e:
+    logger.warning(e)
+    logger.warning('Load yolov8 fail. If you want to use yolov8, please check the installation.')
+    pass
+
+TRACKER_DICT = {
+    'sort': SortTracker, 
+    'bytetrack': ByteTracker, 
+    'botsort': BotTracker, 
+    'c_bioutrack': C_BIoUTracker, 
+    'ocsort': OCSortTracker, 
+    'deepsort': DeepSortTracker
+}
+
+def get_args():
+    
+    parser = argparse.ArgumentParser()
+
+    """general"""
+    parser.add_argument('--obj', type=str, required=True, default='demo.mp4', help='video or images folder PATH')
+
+    parser.add_argument('--detector', type=str, default='yolov8', help='yolov7, yolox, etc.')
+    parser.add_argument('--tracker', type=str, default='sort', help='sort, deepsort, etc')
+    parser.add_argument('--reid_model', type=str, default='osnet_x0_25', help='osnet or deppsort')
+
+    parser.add_argument('--kalman_format', type=str, default='default', help='use what kind of Kalman, sort, deepsort, byte, etc.')
+    parser.add_argument('--img_size', type=int, default=1280, help='image size, [h, w]')
+
+    parser.add_argument('--conf_thresh', type=float, default=0.2, help='filter tracks')
+    parser.add_argument('--nms_thresh', type=float, default=0.7, help='thresh for NMS')
+    parser.add_argument('--iou_thresh', type=float, default=0.5, help='IOU thresh to filter tracks')
+
+    parser.add_argument('--device', type=str, default='6', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+
+    """yolox"""
+    parser.add_argument('--num_classes', type=int, default=1)
+    parser.add_argument('--yolox_exp_file', type=str, default='./tracker/yolox_utils/yolox_m.py')
+
+    """model path"""
+    parser.add_argument('--detector_model_path', type=str, default='./weights/best.pt', help='model path')
+    parser.add_argument('--trace', type=bool, default=False, help='traced model of YOLO v7')
+    # other model path
+    parser.add_argument('--reid_model_path', type=str, default='./weights/osnet_x0_25.pth', help='path for reid model path')
+    parser.add_argument('--dhn_path', type=str, default='./weights/DHN.pth', help='path of DHN path for DeepMOT')
+
+   
+    """other options"""
+    parser.add_argument('--discard_reid', action='store_true', help='discard reid model, only work in bot-sort etc. which need a reid part')
+    parser.add_argument('--track_buffer', type=int, default=30, help='tracking buffer')
+    parser.add_argument('--gamma', type=float, default=0.1, help='param to control fusing motion and apperance dist')
+    parser.add_argument('--min_area', type=float, default=150, help='use to filter small bboxs')
+
+    parser.add_argument('--save_dir', type=str, default='track_demo_results')
+    parser.add_argument('--save_images', action='store_true', help='save tracking results (image)')
+    parser.add_argument('--save_videos', action='store_true', help='save tracking results (video)')
+    
+    parser.add_argument('--track_eval', type=bool, default=True, help='Use TrackEval to evaluate')
+
+    return parser.parse_args()
+
+def main(args):
+    
+    """1. set some params"""
+
+    # NOTE: if save video, you must save image
+    if args.save_videos:
+        args.save_images = True
+
+    """2. load detector"""
+    device = select_device(args.device)
+
+    if args.detector == 'yolox':
+
+        exp = get_exp(args.yolox_exp_file, None)  # TODO: modify num_classes etc. for specific dataset
+        model_img_size = exp.input_size
+        model = exp.get_model()
+        model.to(device)
+        model.eval()
+
+        logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
+        ckpt = torch.load(args.detector_model_path, map_location=device)
+        model.load_state_dict(ckpt['model'])
+        logger.info("loaded checkpoint done")
+        model = fuse_model(model)
+
+        stride = None  # match with yolo v7
+
+        logger.info(f'Now detector is on device {next(model.parameters()).device}')
+
+    elif args.detector == 'yolov7':
+
+        logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
+        model = attempt_load(args.detector_model_path, map_location=device)
+
+        # get inference img size
+        stride = int(model.stride.max())  # model stride
+        model_img_size = check_img_size(args.img_size, s=stride)  # check img_size
+
+        # Traced model
+        model = TracedModel(model, device=device, img_size=args.img_size)
+        # model.half()
+
+        logger.info("loaded checkpoint done")
+
+        logger.info(f'Now detector is on device {next(model.parameters()).device}')
+
+    elif args.detector == 'yolov8':
+
+        logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}")
+        model = YOLO(args.detector_model_path)
+
+        model_img_size = [None, None]  
+        stride = None 
+
+        logger.info("loaded checkpoint done")
+
+    else:
+        logger.error(f"detector {args.detector} is not supprted")
+        exit(0)
+
+    """3. load sequences"""
+
+    dataset = DemoDataset(file_name=args.obj, img_size=model_img_size, model=args.detector, stride=stride, )
+    data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)
+
+    tracker = TRACKER_DICT[args.tracker](args, )
+
+
+    save_dir = args.save_dir
+
+    process_bar = enumerate(data_loader)
+    process_bar = tqdm(process_bar, total=len(data_loader), ncols=150)
+
+    results = []
+
+    """4. Tracking"""
+
+    for frame_idx, (ori_img, img) in process_bar:
+        if args.detector == 'yolov8':
+            img = img.squeeze(0).cpu().numpy()
+
+        else:
+            img = img.to(device)  # (1, C, H, W)
+            img = img.float() 
+
+        ori_img = ori_img.squeeze(0)
+
+        # get detector output 
+        with torch.no_grad():
+            if args.detector == 'yolov8':
+                output = model.predict(img, conf=args.conf_thresh, iou=args.nms_thresh)
+            else:
+                output = model(img)
+
+        # postprocess output to original scales
+        if args.detector == 'yolox':
+            output = postprocess_yolox(output, args.num_classes, conf_thresh=args.conf_thresh, 
+                                        img=img, ori_img=ori_img)
+
+        elif args.detector == 'yolov7':
+            output = postprocess_yolov7(output, args.conf_thresh, args.nms_thresh, img.shape[2:], ori_img.shape)
+
+        elif args.detector == 'yolov8':
+            output = postprocess_yolov8(output)
+        
+        else: raise NotImplementedError
+
+        # output: (tlbr, conf, cls)
+        # convert tlbr to tlwh
+        if isinstance(output, torch.Tensor): 
+            output = output.detach().cpu().numpy()
+        output[:, 2] -= output[:, 0]
+        output[:, 3] -= output[:, 1]
+        current_tracks = tracker.update(output, img, ori_img.cpu().numpy())
+    
+        # save results
+        cur_tlwh, cur_id, cur_cls, cur_score = [], [], [], []
+        for trk in current_tracks:
+            bbox = trk.tlwh
+            id = trk.track_id
+            cls = trk.category
+            score = trk.score
+
+            # filter low area bbox
+            if bbox[2] * bbox[3] > args.min_area:
+                cur_tlwh.append(bbox)
+                cur_id.append(id)
+                cur_cls.append(cls)
+                cur_score.append(score)
+                # results.append((frame_id + 1, id, bbox, cls))
+
+        results.append((frame_idx + 1, cur_id, cur_tlwh, cur_cls, cur_score))
+
+        if args.save_images:
+            plot_img(img=ori_img, frame_id=frame_idx, results=[cur_tlwh, cur_id, cur_cls], 
+                        save_dir=os.path.join(save_dir, 'vis_results'))
+
+    save_results(folder_name=os.path.join(save_dir, 'txt_results'), 
+                    seq_name='demo', 
+                    results=results)
+    
+    if args.save_videos:
+        save_video(images_path=os.path.join(save_dir, 'vis_results'))
+        logger.info(f'save video done')
+
+if __name__ == '__main__':
+
+    args = get_args()
+        
+    main(args)
--- a/test/yolov7-tracker/tracker/tracker_dataloader.py
+++ b/test/yolov7-tracker/tracker/tracker_dataloader.py
@@ -0,0 +1,223 @@
+import numpy as np
+import torch 
+import cv2 
+import os 
+import os.path as osp
+
+from torch.utils.data import Dataset
+
+
+class TestDataset(Dataset):
+    """ This class generate origin image, preprocessed image for inference
+        NOTE: for every sequence, initialize a TestDataset class
+
+    """
+
+    def __init__(self, data_root, split, seq_name, img_size=[640, 640], legacy_yolox=True, model='yolox', **kwargs) -> None:
+        """
+        Args:
+            data_root: path for entire dataset
+            seq_name: name of sequence
+            img_size: List[int, int] | Tuple[int, int] image size for detection model 
+            legacy_yolox: bool, to be compatible with older versions of yolox
+            model: detection model, currently support x, v7, v8
+        """
+        super().__init__()
+
+        self.model = model
+
+        self.data_root = data_root
+        self.seq_name = seq_name
+        self.img_size = img_size 
+        self.split = split 
+
+        self.seq_path = osp.join(self.data_root, 'images', self.split, self.seq_name)
+        self.imgs_in_seq = sorted(os.listdir(self.seq_path))
+        
+        self.legacy = legacy_yolox
+
+        self.other_param = kwargs
+
+    def __getitem__(self, idx):
+        
+        if self.model == 'yolox':
+            return self._getitem_yolox(idx)
+        elif self.model == 'yolov7':
+            return self._getitem_yolov7(idx)
+        elif self.model == 'yolov8':
+            return self._getitem_yolov8(idx)
+    
+    def _getitem_yolox(self, idx):
+
+        img = cv2.imread(osp.join(self.seq_path, self.imgs_in_seq[idx])) 
+        img_resized, _ = self._preprocess_yolox(img, self.img_size, )
+        if self.legacy:
+            img_resized = img_resized[::-1, :, :].copy()  # BGR -> RGB
+            img_resized /= 255.0
+            img_resized -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
+            img_resized /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
+
+        return torch.from_numpy(img), torch.from_numpy(img_resized)
+
+    def _getitem_yolov7(self, idx):
+
+        img = cv2.imread(osp.join(self.seq_path, self.imgs_in_seq[idx])) 
+
+        img_resized = self._preprocess_yolov7(img, )  # torch.Tensor
+
+        return torch.from_numpy(img), img_resized
+    
+    def _getitem_yolov8(self, idx):
+
+        img = cv2.imread(osp.join(self.seq_path, self.imgs_in_seq[idx]))  # (h, w, c)
+        # img = self._preprocess_yolov8(img)
+
+        return torch.from_numpy(img), torch.from_numpy(img)
+
+
+    def _preprocess_yolox(self, img, size, swap=(2, 0, 1)):
+        """ convert origin image to resized image, YOLOX-manner
+
+        Args:
+            img: np.ndarray
+            size: List[int, int] | Tuple[int, int]
+            swap: (H, W, C) -> (C, H, W)
+
+        Returns:
+            np.ndarray, float
+        
+        """
+        if len(img.shape) == 3:
+            padded_img = np.ones((size[0], size[1], 3), dtype=np.uint8) * 114
+        else:
+            padded_img = np.ones(size, dtype=np.uint8) * 114
+
+        r = min(size[0] / img.shape[0], size[1] / img.shape[1])
+        resized_img = cv2.resize(
+            img,
+            (int(img.shape[1] * r), int(img.shape[0] * r)),
+            interpolation=cv2.INTER_LINEAR,
+        ).astype(np.uint8)
+        padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
+
+        padded_img = padded_img.transpose(swap)
+        padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
+        return padded_img, r
+
+    def _preprocess_yolov7(self, img, ):
+        
+        img_resized = self._letterbox(img, new_shape=self.img_size, stride=self.other_param['stride'], )[0]
+        img_resized = img_resized[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
+        img_resized = np.ascontiguousarray(img_resized)
+
+        img_resized = torch.from_numpy(img_resized).float()
+        img_resized /= 255.0
+
+        return img_resized
+    
+    def _preprocess_yolov8(self, img, ):
+
+        img = img.transpose((2, 0, 1))
+        img = np.ascontiguousarray(img) 
+
+        return img
+
+
+    def _letterbox(self, img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
+        # Resize and pad image while meeting stride-multiple constraints
+        shape = img.shape[:2]  # current shape [height, width]
+        if isinstance(new_shape, int):
+            new_shape = (new_shape, new_shape)
+
+        # Scale ratio (new / old)
+        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+        if not scaleup:  # only scale down, do not scale up (for better test mAP)
+            r = min(r, 1.0)
+
+        # Compute padding
+        ratio = r, r  # width, height ratios
+        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+        if auto:  # minimum rectangle
+            dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+        elif scaleFill:  # stretch
+            dw, dh = 0.0, 0.0
+            new_unpad = (new_shape[1], new_shape[0])
+            ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+
+        dw /= 2  # divide padding into 2 sides
+        dh /= 2
+
+        if shape[::-1] != new_unpad:  # resize
+            img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+        top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+        left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+        img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+        return img, ratio, (dw, dh)
+
+    def __len__(self, ):
+        return len(self.imgs_in_seq)
+    
+
+class DemoDataset(TestDataset):
+    """
+    dataset for demo
+    """
+    def __init__(self, file_name, img_size=[640, 640], model='yolox', legacy_yolox=True, **kwargs) -> None:
+
+        self.file_name = file_name
+        self.model = model 
+        self.img_size = img_size
+
+        self.is_video = '.mp4' in file_name or '.avi' in file_name 
+
+        if not self.is_video:
+            self.imgs_in_seq = sorted(os.listdir(file_name))
+        else:
+            self.imgs_in_seq = []
+            self.cap = cv2.VideoCapture(file_name)
+
+            while True:
+                ret, frame = self.cap.read()
+                if not ret: break
+
+                self.imgs_in_seq.append(frame)
+
+        self.legacy = legacy_yolox
+
+    def __getitem__(self, idx):
+
+        if not self.is_video:
+            img = cv2.imread(osp.join(self.file_name, self.imgs_in_seq[idx]))
+        else:
+            img = self.imgs_in_seq[idx]
+        
+        if self.model == 'yolox':
+            return self._getitem_yolox(img)
+        elif self.model == 'yolov7':
+            return self._getitem_yolov7(img)
+        elif self.model == 'yolov8':
+            return self._getitem_yolov8(img)
+
+    def _getitem_yolox(self, img):
+
+        img_resized, _ = self._preprocess_yolox(img, self.img_size, )
+        if self.legacy:
+            img_resized = img_resized[::-1, :, :].copy()  # BGR -> RGB
+            img_resized /= 255.0
+            img_resized -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
+            img_resized /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
+
+        return torch.from_numpy(img), torch.from_numpy(img_resized)
+
+    def _getitem_yolov7(self, img):
+
+        img_resized = self._preprocess_yolov7(img, )  # torch.Tensor
+
+        return torch.from_numpy(img), img_resized
+    
+    def _getitem_yolov8(self, img):
+
+        # img = self._preprocess_yolov8(img)
+
+        return torch.from_numpy(img), torch.from_numpy(img)
--- a/test/yolov7-tracker/tracker/trackers/basetrack.py
+++ b/test/yolov7-tracker/tracker/trackers/basetrack.py
@@ -0,0 +1,133 @@
+import numpy as np
+from collections import OrderedDict
+
+
+class TrackState(object):
+    New = 0
+    Tracked = 1
+    Lost = 2
+    Removed = 3
+
+
+class BaseTrack(object):
+    _count = 0
+
+    track_id = 0
+    is_activated = False
+    state = TrackState.New
+
+    history = OrderedDict()
+    features = []
+    curr_feature = None
+    score = 0
+    start_frame = 0
+    frame_id = 0
+    time_since_update = 0
+
+    # multi-camera
+    location = (np.inf, np.inf)
+
+    @property
+    def end_frame(self):
+        return self.frame_id
+
+    @staticmethod
+    def next_id():
+        BaseTrack._count += 1
+        return BaseTrack._count
+
+    def activate(self, *args):
+        raise NotImplementedError
+
+    def predict(self):
+        raise NotImplementedError
+
+    def update(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def mark_lost(self):
+        self.state = TrackState.Lost
+
+    def mark_removed(self):
+        self.state = TrackState.Removed
+        
+    @property
+    def tlwh(self):
+        """Get current position in bounding box format `(top left x, top left y,
+                width, height)`.
+        """
+        if self.mean is None:
+            return self._tlwh.copy()
+        ret = self.mean[:4].copy()
+        ret[:2] -= ret[2:] / 2
+        return ret
+
+    @property
+    def tlbr(self):
+        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        ret[2:] += ret[:2]
+        return ret
+    @property
+    def xywh(self):
+        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        ret[:2] += ret[2:] / 2.0
+        return ret
+
+    @staticmethod
+    # @jit(nopython=True)
+    def tlwh_to_xyah(tlwh):
+        """Convert bounding box to format `(center x, center y, aspect ratio,
+        height)`, where the aspect ratio is `width / height`.
+        """
+        ret = np.asarray(tlwh).copy()
+        ret[:2] += ret[2:] / 2
+        ret[2] /= ret[3]
+        return ret
+
+    @staticmethod
+    def tlwh_to_xywh(tlwh):
+        """Convert bounding box to format `(center x, center y, width,
+        height)`.
+        """
+        ret = np.asarray(tlwh).copy()
+        ret[:2] += ret[2:] / 2
+        return ret
+    
+    @staticmethod
+    def tlwh_to_xysa(tlwh):
+        """Convert bounding box to format `(center x, center y, width,
+        height)`.
+        """
+        ret = np.asarray(tlwh).copy()
+        ret[:2] += ret[2:] / 2
+        ret[2] = tlwh[2] * tlwh[3]
+        ret[3] = tlwh[2] / tlwh[3]
+        return ret
+    
+    def to_xyah(self):
+        return self.tlwh_to_xyah(self.tlwh)
+    
+    def to_xywh(self):
+        return self.tlwh_to_xywh(self.tlwh)
+
+    @staticmethod
+    def tlbr_to_tlwh(tlbr):
+        ret = np.asarray(tlbr).copy()
+        ret[2:] -= ret[:2]
+        return ret
+
+    @staticmethod
+    # @jit(nopython=True)
+    def tlwh_to_tlbr(tlwh):
+        ret = np.asarray(tlwh).copy()
+        ret[2:] += ret[:2]
+        return ret
+
+    def __repr__(self):
+        return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
--- a/test/yolov7-tracker/tracker/trackers/botsort_tracker.py
+++ b/test/yolov7-tracker/tracker/trackers/botsort_tracker.py
@@ -0,0 +1,329 @@
+"""
+Bot sort
+"""
+
+import numpy as np  
+import torch 
+from torchvision.ops import nms
+
+import cv2 
+import torchvision.transforms as T
+
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet, Tracklet_w_reid
+from .matching import *
+
+from .reid_models.OSNet import *
+from .reid_models.load_model_tools import load_pretrained_weights
+from .reid_models.deepsort_reid import Extractor
+
+from .camera_motion_compensation import GMC
+
+REID_MODEL_DICT = {
+    'osnet_x1_0': osnet_x1_0, 
+    'osnet_x0_75': osnet_x0_75, 
+    'osnet_x0_5': osnet_x0_5, 
+    'osnet_x0_25': osnet_x0_25, 
+    'deepsort': Extractor
+}
+
+
+def load_reid_model(reid_model, reid_model_path):
+    
+    if 'osnet' in reid_model:
+        func = REID_MODEL_DICT[reid_model]
+        model = func(num_classes=1, pretrained=False, )
+        load_pretrained_weights(model, reid_model_path)
+        model.cuda().eval()
+        
+    elif 'deepsort' in reid_model:
+        model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
+
+    else:
+        raise NotImplementedError
+    
+    return model
+
+class BotTracker(object):
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+        self.with_reid = not args.discard_reid
+
+        self.reid_model, self.crop_transforms = None, None 
+        if self.with_reid:
+            self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
+            self.crop_transforms = T.Compose([
+            # T.ToPILImage(),
+            # T.Resize(size=(256, 128)),
+            T.ToTensor(),  # (c, 128, 256)
+            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        ])
+            
+
+        # camera motion compensation module
+        self.gmc = GMC(method='orb', downscale=2, verbose=None)
+
+    def reid_preprocess(self, obj_bbox):
+        """
+        preprocess cropped object bboxes 
+        
+        obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
+
+        return: 
+        torch.Tensor of shape (c, 128, 256)
+        """
+        obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=(128, 128))  # shape: (128, 256, c)
+
+        return self.crop_transforms(obj_bbox)
+
+    def get_feature(self, tlwhs, ori_img):
+        """
+        get apperance feature of an object
+        tlwhs: shape (num_of_objects, 4)
+        ori_img: original image, np.ndarray, shape(H, W, C)
+        """
+        obj_bbox = []
+
+        for tlwh in tlwhs:
+            tlwh = list(map(int, tlwh))
+            # if any(tlbr_ == -1 for tlbr_ in tlwh):
+            #     print(tlwh)
+            
+            tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
+            obj_bbox.append(tlbr_tensor)
+        
+        if not obj_bbox:
+            return np.array([])
+        
+        obj_bbox = torch.stack(obj_bbox, dim=0)
+        obj_bbox = obj_bbox.cuda()  
+        
+        features = self.reid_model(obj_bbox)  # shape: (num_of_objects, feature_dim)
+        return features.cpu().detach().numpy()
+
+
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlwh format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+        inds_low = scores > 0.1
+        inds_high = scores < self.args.conf_thresh
+
+        inds_second = np.logical_and(inds_low, inds_high)
+        dets_second = bboxes[inds_second]
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        cates_second = categories[inds_second]
+        
+        scores_keep = scores[remain_inds]
+        scores_second = scores[inds_second]
+
+        """Step 1: Extract reid features"""
+        if self.with_reid:
+            features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
+
+        if len(dets) > 0:
+            if self.with_reid:
+                detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
+                            (tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
+            else:
+                detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
+                            (tlwh, s, cate) in zip(dets, scores_keep, cates)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with high score detection boxes'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        # Camera motion compensation
+        warp = self.gmc.apply(ori_img, dets)
+        self.gmc.multi_gmc(tracklet_pool, warp)
+        self.gmc.multi_gmc(unconfirmed, warp)
+
+        ious_dists = iou_distance(tracklet_pool, detections)
+        ious_dists_mask = (ious_dists > 0.5)  # high conf iou
+
+        if self.with_reid:
+            # mixed cost matrix
+            emb_dists = embedding_distance(tracklet_pool, detections) / 2.0
+            raw_emb_dists = emb_dists.copy()
+            emb_dists[emb_dists > 0.25] = 1.0
+            emb_dists[ious_dists_mask] = 1.0
+            dists = np.minimum(ious_dists, emb_dists)
+
+        else:
+            dists = ious_dists
+        
+        matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        ''' Step 3: Second association, with low score detection boxes'''
+        # association the untrack to the low score detections
+        if len(dets_second) > 0:
+            '''Detections'''
+            detections_second = [Tracklet(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
+        else:
+            detections_second = []
+
+        r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+        dists = iou_distance(r_tracked_tracklets, detections_second)
+        matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
+        for itracked, idet in matches:
+            track = r_tracked_tracklets[itracked]
+            det = detections_second[idet]
+            if track.state == TrackState.Tracked:
+                track.update(det, self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        for it in u_track:
+            track = r_tracked_tracklets[it]
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detections[i] for i in u_detection]
+        ious_dists = iou_distance(unconfirmed, detections)
+        ious_dists_mask = (ious_dists > 0.5)
+
+        if self.with_reid:
+            emb_dists = embedding_distance(unconfirmed, detections) / 2.0
+            raw_emb_dists = emb_dists.copy()
+            emb_dists[emb_dists > 0.25] = 1.0
+            emb_dists[ious_dists_mask] = 1.0
+            dists = np.minimum(ious_dists, emb_dists)
+        else:
+            dists = ious_dists
+
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/test/yolov7-tracker/tracker/trackers/byte_tracker.py
+++ b/test/yolov7-tracker/tracker/trackers/byte_tracker.py
@@ -0,0 +1,201 @@
+"""
+ByteTrack
+"""
+
+import numpy as np
+from collections import deque
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet
+from .matching import *
+
+class ByteTracker(object):
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlbr format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+        inds_low = scores > 0.1
+        inds_high = scores < self.args.conf_thresh
+
+        inds_second = np.logical_and(inds_low, inds_high)
+        dets_second = bboxes[inds_second]
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        cates_second = categories[inds_second]
+        
+        scores_keep = scores[remain_inds]
+        scores_second = scores[inds_second]
+
+        if len(dets) > 0:
+            '''Detections'''
+            detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets, scores_keep, cates)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with high score detection boxes'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        dists = iou_distance(tracklet_pool, detections)
+        
+        matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        ''' Step 3: Second association, with low score detection boxes'''
+        # association the untrack to the low score detections
+        if len(dets_second) > 0:
+            '''Detections'''
+            detections_second = [Tracklet(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
+        else:
+            detections_second = []
+        r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+        dists = iou_distance(r_tracked_tracklets, detections_second)
+        matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
+        for itracked, idet in matches:
+            track = r_tracked_tracklets[itracked]
+            det = detections_second[idet]
+            if track.state == TrackState.Tracked:
+                track.update(det, self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        for it in u_track:
+            track = r_tracked_tracklets[it]
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detections[i] for i in u_detection]
+        dists = iou_distance(unconfirmed, detections)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/test/yolov7-tracker/tracker/trackers/c_biou_tracker.py
+++ b/test/yolov7-tracker/tracker/trackers/c_biou_tracker.py
@@ -0,0 +1,204 @@
+"""
+C_BIoU Track
+"""
+
+import numpy as np
+from collections import deque
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet, Tracklet_w_bbox_buffer
+from .matching import *
+
+class C_BIoUTracker(object):
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlbr format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+        inds_low = scores > 0.1
+        inds_high = scores < self.args.conf_thresh
+
+        inds_second = np.logical_and(inds_low, inds_high)
+        dets_second = bboxes[inds_second]
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        cates_second = categories[inds_second]
+        
+        scores_keep = scores[remain_inds]
+        scores_second = scores[inds_second]
+
+        if len(dets) > 0:
+            '''Detections'''
+            detections = [Tracklet_w_bbox_buffer(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets, scores_keep, cates)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with high score detection boxes'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        dists = buffered_iou_distance(tracklet_pool, detections, level=1)
+        
+        matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        ''' Step 3: Second association, with low score detection boxes'''
+        # association the untrack to the low score detections
+        if len(dets_second) > 0:
+            '''Detections'''
+            detections_second = [Tracklet_w_bbox_buffer(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
+        else:
+            detections_second = []
+        r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+
+
+        dists = buffered_iou_distance(r_tracked_tracklets, detections_second, level=2)
+
+        matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
+        for itracked, idet in matches:
+            track = r_tracked_tracklets[itracked]
+            det = detections_second[idet]
+            if track.state == TrackState.Tracked:
+                track.update(det, self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        for it in u_track:
+            track = r_tracked_tracklets[it]
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detections[i] for i in u_detection]
+        dists = buffered_iou_distance(unconfirmed, detections, level=1)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/test/yolov7-tracker/tracker/trackers/camera_motion_compensation.py
+++ b/test/yolov7-tracker/tracker/trackers/camera_motion_compensation.py
@@ -0,0 +1,264 @@
+import cv2
+import numpy as np 
+import copy 
+import matplotlib.pyplot as plt
+
+"""GMC Module"""
+class GMC:
+    def __init__(self, method='orb', downscale=2, verbose=None):
+        super(GMC, self).__init__()
+
+        self.method = method
+        self.downscale = max(1, int(downscale))
+
+        if self.method == 'orb':
+            self.detector = cv2.FastFeatureDetector_create(20)
+            self.extractor = cv2.ORB_create()
+            self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
+
+        elif self.method == 'sift':
+            self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
+            self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
+            self.matcher = cv2.BFMatcher(cv2.NORM_L2)
+
+        elif self.method == 'ecc':
+            number_of_iterations = 100
+            termination_eps = 1e-5
+            self.warp_mode = cv2.MOTION_EUCLIDEAN
+            self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
+
+        elif self.method == 'file' or self.method == 'files':
+            seqName = verbose[0]
+            ablation = verbose[1]
+            if ablation:
+                filePath = r'tracker/GMC_files/MOT17_ablation'
+            else:
+                filePath = r'tracker/GMC_files/MOTChallenge'
+
+            if '-FRCNN' in seqName:
+                seqName = seqName[:-6]
+            elif '-DPM' in seqName:
+                seqName = seqName[:-4]
+            elif '-SDP' in seqName:
+                seqName = seqName[:-4]
+
+            self.gmcFile = open(filePath + "/GMC-" + seqName + ".txt", 'r')
+
+            if self.gmcFile is None:
+                raise ValueError("Error: Unable to open GMC file in directory:" + filePath)
+        elif self.method == 'none' or self.method == 'None':
+            self.method = 'none'
+        else:
+            raise ValueError("Error: Unknown CMC method:" + method)
+
+        self.prevFrame = None
+        self.prevKeyPoints = None
+        self.prevDescriptors = None
+
+        self.initializedFirstFrame = False
+
+    def apply(self, raw_frame, detections=None):
+        if self.method == 'orb' or self.method == 'sift':
+            return self.applyFeaures(raw_frame, detections)
+        elif self.method == 'ecc':
+            return self.applyEcc(raw_frame, detections)
+        elif self.method == 'file':
+            return self.applyFile(raw_frame, detections)
+        elif self.method == 'none':
+            return np.eye(2, 3)
+        else:
+            return np.eye(2, 3)
+
+    def applyEcc(self, raw_frame, detections=None):
+
+        # Initialize
+        height, width, _ = raw_frame.shape
+        frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
+        H = np.eye(2, 3, dtype=np.float32)
+
+        # Downscale image (TODO: consider using pyramids)
+        if self.downscale > 1.0:
+            frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
+            frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
+            width = width // self.downscale
+            height = height // self.downscale
+
+        # Handle first frame
+        if not self.initializedFirstFrame:
+            # Initialize data
+            self.prevFrame = frame.copy()
+
+            # Initialization done
+            self.initializedFirstFrame = True
+
+            return H
+
+        # Run the ECC algorithm. The results are stored in warp_matrix.
+        # (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
+        try:
+            (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
+        except:
+            print('Warning: find transform failed. Set warp as identity')
+
+        return H
+
+    def applyFeaures(self, raw_frame, detections=None):
+
+        # Initialize
+        height, width, _ = raw_frame.shape
+        frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
+        H = np.eye(2, 3)
+
+        # Downscale image (TODO: consider using pyramids)
+        if self.downscale > 1.0:
+            # frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
+            frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
+            width = width // self.downscale
+            height = height // self.downscale
+
+        # find the keypoints
+        mask = np.zeros_like(frame)
+        # mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255
+        mask[int(0.02 * height): int(0.98 * height), int(0.02 * width): int(0.98 * width)] = 255
+        if detections is not None:
+            for det in detections:
+                tlbr = (det[:4] / self.downscale).astype(np.int_)
+                mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0
+
+        keypoints = self.detector.detect(frame, mask)
+
+        # compute the descriptors
+        keypoints, descriptors = self.extractor.compute(frame, keypoints)
+
+        # Handle first frame
+        if not self.initializedFirstFrame:
+            # Initialize data
+            self.prevFrame = frame.copy()
+            self.prevKeyPoints = copy.copy(keypoints)
+            self.prevDescriptors = copy.copy(descriptors)
+
+            # Initialization done
+            self.initializedFirstFrame = True
+
+            return H
+
+        # Match descriptors.
+        knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)
+
+        # Filtered matches based on smallest spatial distance
+        matches = []
+        spatialDistances = []
+
+        maxSpatialDistance = 0.25 * np.array([width, height])
+
+        # Handle empty matches case
+        if len(knnMatches) == 0:
+            # Store to next iteration
+            self.prevFrame = frame.copy()
+            self.prevKeyPoints = copy.copy(keypoints)
+            self.prevDescriptors = copy.copy(descriptors)
+
+            return H
+
+        for m, n in knnMatches:
+            if m.distance < 0.9 * n.distance:
+                prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
+                currKeyPointLocation = keypoints[m.trainIdx].pt
+
+                spatialDistance = (prevKeyPointLocation[0] - currKeyPointLocation[0],
+                                   prevKeyPointLocation[1] - currKeyPointLocation[1])
+
+                if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \
+                        (np.abs(spatialDistance[1]) < maxSpatialDistance[1]):
+                    spatialDistances.append(spatialDistance)
+                    matches.append(m)
+
+        meanSpatialDistances = np.mean(spatialDistances, 0)
+        stdSpatialDistances = np.std(spatialDistances, 0)
+
+        inliesrs = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances
+
+        goodMatches = []
+        prevPoints = []
+        currPoints = []
+        for i in range(len(matches)):
+            if inliesrs[i, 0] and inliesrs[i, 1]:
+                goodMatches.append(matches[i])
+                prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
+                currPoints.append(keypoints[matches[i].trainIdx].pt)
+
+        prevPoints = np.array(prevPoints)
+        currPoints = np.array(currPoints)
+
+        # Draw the keypoint matches on the output image
+        if 0:
+            matches_img = np.hstack((self.prevFrame, frame))
+            matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
+            W = np.size(self.prevFrame, 1)
+            for m in goodMatches:
+                prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
+                curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
+                curr_pt[0] += W
+                color = np.random.randint(0, 255, (3,))
+                color = (int(color[0]), int(color[1]), int(color[2]))
+
+                matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
+                matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)
+                matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)
+
+            plt.figure()
+            plt.imshow(matches_img)
+            plt.show()
+
+        # Find rigid matrix
+        if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
+            H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
+
+            # Handle downscale
+            if self.downscale > 1.0:
+                H[0, 2] *= self.downscale
+                H[1, 2] *= self.downscale
+        else:
+            print('Warning: not enough matching points')
+
+        # Store to next iteration
+        self.prevFrame = frame.copy()
+        self.prevKeyPoints = copy.copy(keypoints)
+        self.prevDescriptors = copy.copy(descriptors)
+
+        return H
+
+    def applyFile(self, raw_frame, detections=None):
+        line = self.gmcFile.readline()
+        tokens = line.split("\t")
+        H = np.eye(2, 3, dtype=np.float_)
+        H[0, 0] = float(tokens[1])
+        H[0, 1] = float(tokens[2])
+        H[0, 2] = float(tokens[3])
+        H[1, 0] = float(tokens[4])
+        H[1, 1] = float(tokens[5])
+        H[1, 2] = float(tokens[6])
+
+        return H
+
+    @staticmethod
+    def multi_gmc(stracks, H=np.eye(2, 3)):
+        """
+        GMC module prediction
+        :param stracks: List[Strack]
+        """
+        if len(stracks) > 0:
+            multi_mean = np.asarray([st.kalman_filter.kf.x.copy() for st in stracks])
+            multi_covariance = np.asarray([st.kalman_filter.kf.P for st in stracks])
+
+            R = H[:2, :2]
+            R8x8 = np.kron(np.eye(4, dtype=float), R)
+            t = H[:2, 2]
+
+            for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
+                mean = R8x8.dot(mean)
+                mean[:2] += t
+                cov = R8x8.dot(cov).dot(R8x8.transpose())
+
+                stracks[i].kalman_filter.kf.x = mean
+                stracks[i].kalman_filter.kf.P = cov
--- a/test/yolov7-tracker/tracker/trackers/deepsort_tracker.py
+++ b/test/yolov7-tracker/tracker/trackers/deepsort_tracker.py
@@ -0,0 +1,327 @@
+"""
+Deep Sort
+"""
+
+import numpy as np  
+import torch 
+from torchvision.ops import nms
+
+import cv2 
+import torchvision.transforms as T
+
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet, Tracklet_w_reid
+from .matching import *
+
+from .reid_models.OSNet import *
+from .reid_models.load_model_tools import load_pretrained_weights
+from .reid_models.deepsort_reid import Extractor
+
+REID_MODEL_DICT = {
+    'osnet_x1_0': osnet_x1_0, 
+    'osnet_x0_75': osnet_x0_75, 
+    'osnet_x0_5': osnet_x0_5, 
+    'osnet_x0_25': osnet_x0_25, 
+    'deepsort': Extractor
+}
+
+
+def load_reid_model(reid_model, reid_model_path):
+    
+    if 'osnet' in reid_model:
+        func = REID_MODEL_DICT[reid_model]
+        model = func(num_classes=1, pretrained=False, )
+        load_pretrained_weights(model, reid_model_path)
+        model.cuda().eval()
+        
+    elif 'deepsort' in reid_model:
+        model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
+
+    else:
+        raise NotImplementedError
+    
+    return model
+
+
+class DeepSortTracker(object):
+
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+        self.with_reid = not args.discard_reid
+
+        self.reid_model, self.crop_transforms = None, None 
+        if self.with_reid:
+            self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
+            self.crop_transforms = T.Compose([
+            # T.ToPILImage(),
+            # T.Resize(size=(256, 128)),
+            T.ToTensor(),  # (c, 128, 256)
+            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        ])
+            
+        self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128)
+        
+
+    def reid_preprocess(self, obj_bbox):
+        """
+        preprocess cropped object bboxes 
+        
+        obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
+
+        return: 
+        torch.Tensor of shape (c, 128, 256)
+        """
+
+        obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size)  # shape: (h, w, c)
+
+        return self.crop_transforms(obj_bbox)
+
+    def get_feature(self, tlwhs, ori_img):
+        """
+        get apperance feature of an object
+        tlwhs: shape (num_of_objects, 4)
+        ori_img: original image, np.ndarray, shape(H, W, C)
+        """
+        obj_bbox = []
+
+        for tlwh in tlwhs:
+            tlwh = list(map(int, tlwh))
+
+            # limit to the legal range
+            tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0)
+            
+            tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
+
+            obj_bbox.append(tlbr_tensor)
+        
+        if not obj_bbox:
+            return np.array([])
+        
+        obj_bbox = torch.stack(obj_bbox, dim=0)
+        obj_bbox = obj_bbox.cuda()  
+        
+        features = self.reid_model(obj_bbox)  # shape: (num_of_objects, feature_dim)
+        return features.cpu().detach().numpy()
+    
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlbr format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        
+        scores_keep = scores[remain_inds]
+
+        features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
+
+        if len(dets) > 0:
+            '''Detections'''
+            detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
+                          (tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with appearance'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+
+        matches, u_track, u_detection = matching_cascade(distance_metric=self.gated_metric, 
+                                                         matching_thresh=0.9, 
+                                                         cascade_depth=30, 
+                                                         tracks=tracklet_pool, 
+                                                         detections=detections
+                                                         )
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        '''Step 3: Second association, with iou'''
+        tracklet_for_iou = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+        detection_for_iou = [detections[i] for i in u_detection]
+
+        dists = iou_distance(tracklet_for_iou, detection_for_iou)
+
+        matches, u_track, u_detection = linear_assignment(dists, thresh=0.5)
+
+        for itracked, idet in matches:
+            track = tracklet_for_iou[itracked]
+            det = detection_for_iou[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detection_for_iou[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        for it in u_track:
+            track = tracklet_for_iou[it]
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detection_for_iou[i] for i in u_detection]
+        dists = iou_distance(unconfirmed, detections)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+    
+    def gated_metric(self, tracks, dets):
+        """
+        get cost matrix, firstly calculate apperence cost, then filter by Kalman state.
+
+        tracks: List[STrack]
+        dets: List[STrack]
+        """
+        apperance_dist = nearest_embedding_distance(tracks=tracks, detections=dets, metric='cosine')
+        cost_matrix = self.gate_cost_matrix(apperance_dist, tracks, dets, )
+        return cost_matrix
+    
+    def gate_cost_matrix(self, cost_matrix, tracks, dets, max_apperance_thresh=0.15, gated_cost=1e5, only_position=False):
+        """
+        gate cost matrix by calculating the Kalman state distance and constrainted by
+        0.95 confidence interval of x2 distribution
+
+        cost_matrix: np.ndarray, shape (len(tracks), len(dets))
+        tracks: List[STrack]
+        dets: List[STrack]
+        gated_cost: a very largt const to infeasible associations
+        only_position: use [xc, yc, a, h] as state vector or only use [xc, yc]
+
+        return:
+        updated cost_matirx, np.ndarray
+        """
+        gating_dim = 2 if only_position else 4
+        gating_threshold = chi2inv95[gating_dim]
+        measurements = np.asarray([Tracklet.tlwh_to_xyah(det.tlwh) for det in dets])  # (len(dets), 4)
+
+        cost_matrix[cost_matrix > max_apperance_thresh] = gated_cost
+        for row, track in enumerate(tracks):
+            gating_distance = track.kalman_filter.gating_distance(measurements, )
+            cost_matrix[row, gating_distance > gating_threshold] = gated_cost
+        return cost_matrix
+    
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/test/yolov7-tracker/tracker/trackers/kalman_filters/base_kalman.py
+++ b/test/yolov7-tracker/tracker/trackers/kalman_filters/base_kalman.py
@@ -0,0 +1,74 @@
+from filterpy.kalman import KalmanFilter
+import numpy as np 
+import scipy
+
+class BaseKalman:
+
+    def __init__(self,
+                 state_dim: int = 8, 
+                 observation_dim: int = 4, 
+                 F: np.ndarray = np.zeros((0, )), 
+                 P: np.ndarray = np.zeros((0, )),  
+                 Q: np.ndarray = np.zeros((0, )),  
+                 H: np.ndarray = np.zeros((0, )), 
+                 R: np.ndarray = np.zeros((0, )), 
+                 ) -> None:
+        
+        self.kf = KalmanFilter(dim_x=state_dim, dim_z=observation_dim, dim_u=0)
+        if F.shape[0] > 0: self.kf.F = F  # if valid 
+        if P.shape[0] > 0: self.kf.P = P 
+        if Q.shape[0] > 0: self.kf.Q = Q 
+        if H.shape[0] > 0: self.kf.H = H 
+        if R.shape[0] > 0: self.kf.R = R 
+
+    def initialize(self, observation):
+        return NotImplementedError
+
+    def predict(self, ):
+        self.kf.predict()
+
+    def update(self, observation, **kwargs):
+        self.kf.update(observation, self.R, self.H)
+
+    def get_state(self, ):
+        return self.kf.x
+    
+    def gating_distance(self, measurements, only_position=False):
+        """Compute gating distance between state distribution and measurements.
+        A suitable distance threshold can be obtained from `chi2inv95`. If
+        `only_position` is False, the chi-square distribution has 4 degrees of
+        freedom, otherwise 2.
+        Parameters
+        ----------
+        measurements : ndarray
+            An Nx4 dimensional matrix of N measurements, note the format (whether xywh or xyah or others)
+            should be identical to state definition
+        only_position : Optional[bool]
+            If True, distance computation is done with respect to the bounding
+            box center position only.
+        Returns
+        -------
+        ndarray
+            Returns an array of length N, where the i-th element contains the
+            squared Mahalanobis distance between (mean, covariance) and
+            `measurements[i]`.
+        """
+        
+        # map state space to measurement space
+        mean = self.kf.x.copy()
+        mean = np.dot(self.kf.H, mean)
+        covariance = np.linalg.multi_dot((self.kf.H, self.kf.P, self.kf.H.T))
+
+        if only_position:
+            mean, covariance = mean[:2], covariance[:2, :2]
+            measurements = measurements[:, :2]
+
+        cholesky_factor = np.linalg.cholesky(covariance)
+        d = measurements - mean
+        z = scipy.linalg.solve_triangular(
+            cholesky_factor, d.T, lower=True, check_finite=False,
+            overwrite_b=True)
+        squared_maha = np.sum(z * z, axis=0)
+        return squared_maha
+
+    
--- a/test/yolov7-tracker/tracker/trackers/kalman_filters/botsort_kalman.py
+++ b/test/yolov7-tracker/tracker/trackers/kalman_filters/botsort_kalman.py
@@ -0,0 +1,99 @@
+from numpy.core.multiarray import zeros as zeros
+from .base_kalman import BaseKalman
+import numpy as np 
+import cv2 
+
+class BotKalman(BaseKalman):
+
+    def __init__(self, ):
+
+        state_dim = 8  # [x, y, w, h, vx, vy, vw, vh]
+        observation_dim = 4 
+
+        F = np.eye(state_dim, state_dim)
+        '''
+        [1, 0, 0, 0, 1, 0, 0]
+        [0, 1, 0, 0, 0, 1, 0]
+        ...
+        '''
+        for i in range(state_dim // 2):
+            F[i, i + state_dim // 2] = 1
+
+        H = np.eye(state_dim // 2, state_dim)
+    
+        super().__init__(state_dim=state_dim, 
+                       observation_dim=observation_dim, 
+                       F=F, 
+                       H=H)
+        
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
+        
+    def initialize(self, observation):
+        """ init x, P, Q, R
+        
+        Args:
+            observation: x-y-w-h format
+        """
+        # init x, P, Q, R
+
+        mean_pos = observation
+        mean_vel = np.zeros_like(observation)
+        self.kf.x = np.r_[mean_pos, mean_vel]  # x_{0, 0}
+
+        std = [
+            2 * self._std_weight_position * observation[2],  # related to h
+            2 * self._std_weight_position * observation[3], 
+            2 * self._std_weight_position * observation[2], 
+            2 * self._std_weight_position * observation[3], 
+            10 * self._std_weight_velocity * observation[2], 
+            10 * self._std_weight_velocity * observation[3], 
+            10 * self._std_weight_velocity * observation[2], 
+            10 * self._std_weight_velocity * observation[3], 
+        ]       
+
+        self.kf.P = np.diag(np.square(std))  # P_{0, 0}
+
+    def predict(self, ):
+        """ predict step
+
+        x_{n + 1, n} = F * x_{n, n} 
+        P_{n + 1, n} = F * P_{n, n} * F^T + Q
+
+        """
+        std_pos = [
+            self._std_weight_position * self.kf.x[2],
+            self._std_weight_position * self.kf.x[3],
+            self._std_weight_position * self.kf.x[2],
+            self._std_weight_position * self.kf.x[3]]
+        std_vel = [
+            self._std_weight_velocity * self.kf.x[2],
+            self._std_weight_velocity * self.kf.x[3],
+            self._std_weight_velocity * self.kf.x[2],
+            self._std_weight_velocity * self.kf.x[3]]
+        
+        Q = np.diag(np.square(np.r_[std_pos, std_vel]))
+
+        self.kf.predict(Q=Q)
+        
+    def update(self, z):
+        """ update step
+        
+        Args:
+            z: observation x-y-a-h format
+
+        K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
+        x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
+        P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
+
+        """
+
+        std = [
+            self._std_weight_position * self.kf.x[2],
+            self._std_weight_position * self.kf.x[3],
+            self._std_weight_position * self.kf.x[2],
+            self._std_weight_position * self.kf.x[3]]
+        
+        R = np.diag(np.square(std))
+
+        self.kf.update(z=z, R=R)
--- a/test/yolov7-tracker/tracker/trackers/kalman_filters/bytetrack_kalman.py
+++ b/test/yolov7-tracker/tracker/trackers/kalman_filters/bytetrack_kalman.py
@@ -0,0 +1,97 @@
+from .base_kalman import BaseKalman
+import numpy as np 
+
+class ByteKalman(BaseKalman):
+
+    def __init__(self, ):
+
+        state_dim = 8  # [x, y, a, h, vx, vy, va, vh]
+        observation_dim = 4 
+
+        F = np.eye(state_dim, state_dim)
+        '''
+        [1, 0, 0, 0, 1, 0, 0]
+        [0, 1, 0, 0, 0, 1, 0]
+        ...
+        '''
+        for i in range(state_dim // 2):
+            F[i, i + state_dim // 2] = 1
+
+        H = np.eye(state_dim // 2, state_dim)
+    
+        super().__init__(state_dim=state_dim, 
+                       observation_dim=observation_dim, 
+                       F=F, 
+                       H=H)
+        
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
+        
+    def initialize(self, observation):
+        """ init x, P, Q, R
+        
+        Args:
+            observation: x-y-a-h format
+        """
+        # init x, P, Q, R
+
+        mean_pos = observation
+        mean_vel = np.zeros_like(observation)
+        self.kf.x = np.r_[mean_pos, mean_vel]  # x_{0, 0}
+
+        std = [
+            2 * self._std_weight_position * observation[3],  # related to h
+            2 * self._std_weight_position * observation[3], 
+            1e-2, 
+            2 * self._std_weight_position * observation[3], 
+            10 * self._std_weight_velocity * observation[3], 
+            10 * self._std_weight_velocity * observation[3], 
+            1e-5, 
+            10 * self._std_weight_velocity * observation[3], 
+        ]       
+
+        self.kf.P = np.diag(np.square(std))  # P_{0, 0}
+
+    def predict(self, ):
+        """ predict step
+
+        x_{n + 1, n} = F * x_{n, n} 
+        P_{n + 1, n} = F * P_{n, n} * F^T + Q
+
+        """
+        std_pos = [
+            self._std_weight_position * self.kf.x[3],
+            self._std_weight_position * self.kf.x[3],
+            1e-2,
+            self._std_weight_position * self.kf.x[3]]
+        std_vel = [
+            self._std_weight_velocity * self.kf.x[3],
+            self._std_weight_velocity * self.kf.x[3],
+            1e-5,
+            self._std_weight_velocity * self.kf.x[3]]
+        
+        Q = np.diag(np.square(np.r_[std_pos, std_vel]))
+
+        self.kf.predict(Q=Q)
+        
+    def update(self, z):
+        """ update step
+        
+        Args:
+            z: observation x-y-a-h format
+
+        K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
+        x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
+        P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
+
+        """
+
+        std = [
+            self._std_weight_position * self.kf.x[3],
+            self._std_weight_position * self.kf.x[3],
+            1e-1,
+            self._std_weight_position * self.kf.x[3]]
+        
+        R = np.diag(np.square(std))
+
+        self.kf.update(z=z, R=R)
--- a/test/yolov7-tracker/tracker/trackers/kalman_filters/ocsort_kalman.py
+++ b/test/yolov7-tracker/tracker/trackers/kalman_filters/ocsort_kalman.py
@@ -0,0 +1,144 @@
+from numpy.core.multiarray import zeros as zeros
+from .base_kalman import BaseKalman
+import numpy as np 
+from copy import deepcopy
+
+class OCSORTKalman(BaseKalman):
+
+    def __init__(self, ):
+        
+        state_dim = 7  # [x, y, s, a, vx, vy, vs]  s: area
+        observation_dim = 4 
+
+        F = np.array([[1, 0, 0, 0, 1, 0, 0], 
+                      [0, 1, 0, 0, 0, 1, 0], 
+                      [0, 0, 1, 0, 0, 0, 1], 
+                      [0, 0, 0, 1, 0, 0, 0],  
+                      [0, 0, 0, 0, 1, 0, 0], 
+                      [0, 0, 0, 0, 0, 1, 0], 
+                      [0, 0, 0, 0, 0, 0, 1]])
+
+        H = np.eye(state_dim // 2 + 1, state_dim)
+    
+        super().__init__(state_dim=state_dim, 
+                       observation_dim=observation_dim, 
+                       F=F, 
+                       H=H)
+        
+        # TODO check
+        # give high uncertainty to the unobservable initial velocities
+        self.kf.R[2:, 2:] *= 10  # [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 10]]
+        self.kf.P[4:, 4:] *= 1000
+        self.kf.P *= 10 
+        self.kf.Q[-1, -1] *= 0.01 
+        self.kf.Q[4:, 4:] *= 0.01 
+
+        # keep all observations 
+        self.history_obs = []
+        self.attr_saved = None
+        self.observed = False 
+    
+    def initialize(self, observation):
+        """
+        Args:
+            observation: x-y-s-a
+        """
+        self.kf.x = self.kf.x.flatten()
+        self.kf.x[:4] = observation
+
+
+    def predict(self, ):
+        """ predict step
+        
+        """
+        
+        # s + vs
+        if (self.kf.x[6] + self.kf.x[2] <= 0):
+            self.kf.x[6] *= 0.0
+
+        self.kf.predict()
+
+    def _freeze(self, ):
+        """ freeze all the param of Kalman
+        
+        """
+        self.attr_saved = deepcopy(self.kf.__dict__)
+
+    def _unfreeze(self, ):
+        """ when observe an lost object again, use the virtual trajectory
+        
+        """
+        if self.attr_saved is not None:
+            new_history = deepcopy(self.history_obs)
+            self.kf.__dict__ = self.attr_saved 
+
+            self.history_obs = self.history_obs[:-1]
+
+            occur = [int(d is None) for d in new_history]
+            indices = np.where(np.array(occur)==0)[0]
+            index1 = indices[-2]
+            index2 = indices[-1]
+            box1 = new_history[index1]
+            x1, y1, s1, r1 = box1 
+            w1 = np.sqrt(s1 * r1)
+            h1 = np.sqrt(s1 / r1)
+            box2 = new_history[index2]
+            x2, y2, s2, r2 = box2 
+            w2 = np.sqrt(s2 * r2)
+            h2 = np.sqrt(s2 / r2)
+            time_gap = index2 - index1
+            dx = (x2-x1)/time_gap
+            dy = (y2-y1)/time_gap 
+            dw = (w2-w1)/time_gap 
+            dh = (h2-h1)/time_gap
+            for i in range(index2 - index1):
+                """
+                    The default virtual trajectory generation is by linear
+                    motion (constant speed hypothesis), you could modify this 
+                    part to implement your own. 
+                """
+                x = x1 + (i+1) * dx 
+                y = y1 + (i+1) * dy 
+                w = w1 + (i+1) * dw 
+                h = h1 + (i+1) * dh
+                s = w * h 
+                r = w / float(h)
+                new_box = np.array([x, y, s, r]).reshape((4, 1))
+                """
+                    I still use predict-update loop here to refresh the parameters,
+                    but this can be faster by directly modifying the internal parameters
+                    as suggested in the paper. I keep this naive but slow way for 
+                    easy read and understanding
+                """
+                self.kf.update(new_box)
+                if not i == (index2-index1-1):
+                    self.kf.predict()
+
+
+    def update(self, z):
+        """ update step
+
+        For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman
+        
+        Args:
+            z: observation x-y-s-a format
+        """
+
+        self.history_obs.append(z)
+
+        if z is None:
+            if self.observed:
+                self._freeze()
+                self.observed = False
+            
+            self.kf.update(z)
+
+        else:
+            if not self.observed:  # Get observation, use online smoothing to re-update parameters
+                self._unfreeze()
+            
+            self.kf.update(z)
+
+        self.observed = True 
+
+
--- a/test/yolov7-tracker/tracker/trackers/kalman_filters/sort_kalman.py
+++ b/test/yolov7-tracker/tracker/trackers/kalman_filters/sort_kalman.py
@@ -0,0 +1,73 @@
+from numpy.core.multiarray import zeros as zeros
+from .base_kalman import BaseKalman
+import numpy as np 
+from copy import deepcopy
+
+class SORTKalman(BaseKalman):
+
+    def __init__(self, ):
+        
+        state_dim = 7  # [x, y, s, a, vx, vy, vs]  s: area
+        observation_dim = 4 
+
+        F = np.array([[1, 0, 0, 0, 1, 0, 0], 
+                      [0, 1, 0, 0, 0, 1, 0], 
+                      [0, 0, 1, 0, 0, 0, 1], 
+                      [0, 0, 0, 1, 0, 0, 0],  
+                      [0, 0, 0, 0, 1, 0, 0], 
+                      [0, 0, 0, 0, 0, 1, 0], 
+                      [0, 0, 0, 0, 0, 0, 1]])
+
+        H = np.eye(state_dim // 2 + 1, state_dim)
+    
+        super().__init__(state_dim=state_dim, 
+                       observation_dim=observation_dim, 
+                       F=F, 
+                       H=H)
+        
+        # TODO check
+        # give high uncertainty to the unobservable initial velocities
+        self.kf.R[2:, 2:] *= 10  # [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 10]]
+        self.kf.P[4:, 4:] *= 1000
+        self.kf.P *= 10 
+        self.kf.Q[-1, -1] *= 0.01 
+        self.kf.Q[4:, 4:] *= 0.01 
+
+        # keep all observations 
+        self.history_obs = []
+        self.attr_saved = None
+        self.observed = False 
+    
+    def initialize(self, observation):
+        """
+        Args:
+            observation: x-y-s-a
+        """
+        self.kf.x = self.kf.x.flatten()
+        self.kf.x[:4] = observation
+
+
+    def predict(self, ):
+        """ predict step
+        
+        """
+        
+        # s + vs
+        if (self.kf.x[6] + self.kf.x[2] <= 0):
+            self.kf.x[6] *= 0.0
+
+        self.kf.predict()
+
+    def update(self, z):
+        """ update step
+
+        For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman
+        
+        Args:
+            z: observation x-y-s-a format
+        """
+
+        self.kf.update(z)
+
+
+
--- a/test/yolov7-tracker/tracker/trackers/kalman_filters/strongsort_kalman.py
+++ b/test/yolov7-tracker/tracker/trackers/kalman_filters/strongsort_kalman.py
@@ -0,0 +1,101 @@
+from .base_kalman import BaseKalman
+import numpy as np 
+
+class NSAKalman(BaseKalman):
+
+    def __init__(self, ):
+
+        state_dim = 8  # [x, y, a, h, vx, vy, va, vh]
+        observation_dim = 4 
+
+        F = np.eye(state_dim, state_dim)
+        '''
+        [1, 0, 0, 0, 1, 0, 0]
+        [0, 1, 0, 0, 0, 1, 0]
+        ...
+        '''
+        for i in range(state_dim // 2):
+            F[i, i + state_dim // 2] = 1
+
+        H = np.eye(state_dim // 2, state_dim)
+    
+        super().__init__(state_dim=state_dim, 
+                       observation_dim=observation_dim, 
+                       F=F, 
+                       H=H)
+        
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
+        
+    def initialize(self, observation):
+        """ init x, P, Q, R
+        
+        Args:
+            observation: x-y-a-h format
+        """
+        # init x, P, Q, R
+
+        mean_pos = observation
+        mean_vel = np.zeros_like(observation)
+        self.kf.x = np.r_[mean_pos, mean_vel]  # x_{0, 0}
+
+        std = [
+            2 * self._std_weight_position * observation[3],  # related to h
+            2 * self._std_weight_position * observation[3], 
+            1e-2, 
+            2 * self._std_weight_position * observation[3], 
+            10 * self._std_weight_velocity * observation[3], 
+            10 * self._std_weight_velocity * observation[3], 
+            1e-5, 
+            10 * self._std_weight_velocity * observation[3], 
+        ]       
+
+        self.kf.P = np.diag(np.square(std))  # P_{0, 0}
+
+    def predict(self, ):
+        """ predict step
+
+        x_{n + 1, n} = F * x_{n, n} 
+        P_{n + 1, n} = F * P_{n, n} * F^T + Q
+
+        """
+        std_pos = [
+            self._std_weight_position * self.kf.x[3],
+            self._std_weight_position * self.kf.x[3],
+            1e-2,
+            self._std_weight_position * self.kf.x[3]]
+        std_vel = [
+            self._std_weight_velocity * self.kf.x[3],
+            self._std_weight_velocity * self.kf.x[3],
+            1e-5,
+            self._std_weight_velocity * self.kf.x[3]]
+        
+        Q = np.diag(np.square(np.r_[std_pos, std_vel]))
+
+        self.kf.predict(Q=Q)
+        
+    def update(self, z, score):
+        """ update step
+        
+        Args:
+            z: observation x-y-a-h format
+            score: the detection score/confidence required by NSA kalman
+
+        K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
+        x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
+        P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
+
+        """
+
+        std = [
+            self._std_weight_position * self.kf.x[3],
+            self._std_weight_position * self.kf.x[3],
+            1e-1,
+            self._std_weight_position * self.kf.x[3]]
+        
+        # NSA 
+        std = [(1. - score) * x for x in std]
+        
+        R = np.diag(np.square(std))
+
+        self.kf.update(z=z, R=R)
--- a/test/yolov7-tracker/tracker/trackers/kalman_filters/ucmctrack_kalman.py
+++ b/test/yolov7-tracker/tracker/trackers/kalman_filters/ucmctrack_kalman.py
@@ -0,0 +1,27 @@
+from .base_kalman import BaseKalman
+import numpy as np 
+
+class UCMCKalman(BaseKalman):
+    def __init__(self, ):
+
+        state_dim = 8
+        observation_dim = 4 
+
+        F = np.eye(state_dim, state_dim)
+        '''
+        [1, 0, 0, 0, 1, 0, 0]
+        [0, 1, 0, 0, 0, 1, 0]
+        ...
+        '''
+        for i in range(state_dim // 2):
+            F[i, i + state_dim // 2] = 1
+
+        H = np.eye(state_dim // 2, state_dim)
+    
+        super().__init__(state_dim=state_dim, 
+                       observation_dim=observation_dim, 
+                       F=F, 
+                       H=H)
+        
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
--- a/test/yolov7-tracker/tracker/trackers/matching.py
+++ b/test/yolov7-tracker/tracker/trackers/matching.py
@@ -0,0 +1,388 @@
+import cv2
+import numpy as np
+import scipy
+import lap
+from scipy.spatial.distance import cdist
+import math
+from cython_bbox import bbox_overlaps as bbox_ious
+import time
+
+chi2inv95 = {
+    1: 3.8415,
+    2: 5.9915,
+    3: 7.8147,
+    4: 9.4877,
+    5: 11.070,
+    6: 12.592,
+    7: 14.067,
+    8: 15.507,
+    9: 16.919}
+
+
+def merge_matches(m1, m2, shape):
+    O,P,Q = shape
+    m1 = np.asarray(m1)
+    m2 = np.asarray(m2)
+
+    M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
+    M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
+
+    mask = M1*M2
+    match = mask.nonzero()
+    match = list(zip(match[0], match[1]))
+    unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
+    unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
+
+    return match, unmatched_O, unmatched_Q
+
+
+def _indices_to_matches(cost_matrix, indices, thresh):
+    matched_cost = cost_matrix[tuple(zip(*indices))]
+    matched_mask = (matched_cost <= thresh)
+
+    matches = indices[matched_mask]
+    unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
+    unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
+
+    return matches, unmatched_a, unmatched_b
+
+
+def linear_assignment(cost_matrix, thresh):
+    if cost_matrix.size == 0:
+        return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
+    matches, unmatched_a, unmatched_b = [], [], []
+    cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
+    for ix, mx in enumerate(x):
+        if mx >= 0:
+            matches.append([ix, mx])
+    unmatched_a = np.where(x < 0)[0]
+    unmatched_b = np.where(y < 0)[0]
+    matches = np.asarray(matches)
+    return matches, unmatched_a, unmatched_b
+
+
+def ious(atlbrs, btlbrs):
+    """
+    Compute cost based on IoU
+    :type atlbrs: list[tlbr] | np.ndarray
+    :type atlbrs: list[tlbr] | np.ndarray
+
+    :rtype ious np.ndarray
+    """
+    ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
+    if ious.size == 0:
+        return ious
+
+    ious = bbox_ious(
+        np.ascontiguousarray(atlbrs, dtype=np.float),
+        np.ascontiguousarray(btlbrs, dtype=np.float)
+    )
+
+    return ious
+
+
+def iou_distance(atracks, btracks):
+    """
+    Compute cost based on IoU
+    :type atracks: list[STrack]
+    :type btracks: list[STrack]
+
+    :rtype cost_matrix np.ndarray
+    """
+
+    if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
+        atlbrs = atracks
+        btlbrs = btracks
+    else:
+        atlbrs = [track.tlbr for track in atracks]
+        btlbrs = [track.tlbr for track in btracks]
+    _ious = ious(atlbrs, btlbrs)
+    cost_matrix = 1 - _ious
+
+    return cost_matrix
+
+def v_iou_distance(atracks, btracks):
+    """
+    Compute cost based on IoU
+    :type atracks: list[STrack]
+    :type btracks: list[STrack]
+
+    :rtype cost_matrix np.ndarray
+    """
+
+    if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
+        atlbrs = atracks
+        btlbrs = btracks
+    else:
+        atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
+        btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
+    _ious = ious(atlbrs, btlbrs)
+    cost_matrix = 1 - _ious
+
+    return cost_matrix
+
+def embedding_distance(tracks, detections, metric='cosine'):
+    """
+    :param tracks: list[STrack]
+    :param detections: list[BaseTrack]
+    :param metric:
+    :return: cost_matrix np.ndarray
+    """
+
+    cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
+    if cost_matrix.size == 0:
+        return cost_matrix
+    det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
+    #for i, track in enumerate(tracks):
+        #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
+    track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
+    cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric))  # Nomalized features
+    return cost_matrix
+
+
+def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
+    if cost_matrix.size == 0:
+        return cost_matrix
+    gating_dim = 2 if only_position else 4
+    gating_threshold = chi2inv95[gating_dim]
+    measurements = np.asarray([det.to_xyah() for det in detections])
+    for row, track in enumerate(tracks):
+        gating_distance = kf.gating_distance(
+            track.mean, track.covariance, measurements, only_position, metric='maha')
+        cost_matrix[row, gating_distance > gating_threshold] = np.inf
+        cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
+    return cost_matrix
+
+
+def fuse_iou(cost_matrix, tracks, detections):
+    if cost_matrix.size == 0:
+        return cost_matrix
+    reid_sim = 1 - cost_matrix
+    iou_dist = iou_distance(tracks, detections)
+    iou_sim = 1 - iou_dist
+    fuse_sim = reid_sim * (1 + iou_sim) / 2
+    det_scores = np.array([det.score for det in detections])
+    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
+    #fuse_sim = fuse_sim * (1 + det_scores) / 2
+    fuse_cost = 1 - fuse_sim
+    return fuse_cost
+
+
+def fuse_score(cost_matrix, detections):
+    if cost_matrix.size == 0:
+        return cost_matrix
+    iou_sim = 1 - cost_matrix
+    det_scores = np.array([det.score for det in detections])
+    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
+    fuse_sim = iou_sim * det_scores
+    fuse_cost = 1 - fuse_sim
+    return fuse_cost
+
+
+def greedy_assignment_iou(dist, thresh):
+        matched_indices = []
+        if dist.shape[1] == 0:
+            return np.array(matched_indices, np.int32).reshape(-1, 2)
+        for i in range(dist.shape[0]):
+            j = dist[i].argmin()
+            if dist[i][j] < thresh:
+                dist[:, j] = 1.
+                matched_indices.append([j, i])
+        return np.array(matched_indices, np.int32).reshape(-1, 2)
+    
+def greedy_assignment(dists, threshs):
+    matches = greedy_assignment_iou(dists.T, threshs)
+    u_det = [d for d in range(dists.shape[1]) if not (d in matches[:, 1])]
+    u_track = [d for d in range(dists.shape[0]) if not (d in matches[:, 0])]
+    return matches, u_track,  u_det
+
+def fuse_score_matrix(cost_matrix, detections, tracks):
+    if cost_matrix.size == 0:
+        return cost_matrix
+    iou_sim = 1 - cost_matrix
+    
+    det_scores = np.array([det.score for det in detections])
+    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
+    trk_scores = np.array([trk.score for trk in tracks])
+    trk_scores = np.expand_dims(trk_scores, axis=1).repeat(cost_matrix.shape[1], axis=1)
+    mid_scores = (det_scores + trk_scores) / 2
+    fuse_sim = iou_sim * mid_scores
+    fuse_cost = 1 - fuse_sim
+    
+    return fuse_cost
+
+"""
+calculate buffered IoU, used in C_BIoU_Tracker
+"""
+def buffered_iou_distance(atracks, btracks, level=1):
+    """
+    atracks: list[C_BIoUSTrack], tracks
+    btracks: list[C_BIoUSTrack], detections
+    level: cascade level, 1 or 2
+    """
+    assert level in [1, 2], 'level must be 1 or 2'
+    if level == 1:  # use motion_state1(tracks) and buffer_bbox1(detections) to calculate
+        atlbrs = [track.tlwh_to_tlbr(track.motion_state1) for track in atracks]
+        btlbrs = [det.tlwh_to_tlbr(det.buffer_bbox1) for det in btracks]
+    else:
+        atlbrs = [track.tlwh_to_tlbr(track.motion_state2) for track in atracks]
+        btlbrs = [det.tlwh_to_tlbr(det.buffer_bbox2) for det in btracks]
+    _ious = ious(atlbrs, btlbrs)
+
+    cost_matrix = 1 - _ious
+    return cost_matrix
+
+"""
+observation centric association, with velocity, for OC Sort
+"""
+def observation_centric_association(tracklets, detections, iou_threshold, velocities, previous_obs, vdc_weight):    
+
+    if(len(tracklets) == 0):
+        return np.empty((0, 2), dtype=int), tuple(range(len(tracklets))), tuple(range(len(detections)))
+    
+    # get numpy format bboxes
+    trk_tlbrs = np.array([track.tlbr for track in tracklets])
+    det_tlbrs = np.array([det.tlbr for det in detections])
+    det_scores = np.array([det.score for det in detections])
+
+    iou_matrix = bbox_ious(trk_tlbrs, det_tlbrs)
+
+    Y, X = speed_direction_batch(det_tlbrs, previous_obs)
+    inertia_Y, inertia_X = velocities[:,0], velocities[:,1]
+    inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
+    inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
+    diff_angle_cos = inertia_X * X + inertia_Y * Y
+    diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
+    diff_angle = np.arccos(diff_angle_cos)
+    diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
+
+    valid_mask = np.ones(previous_obs.shape[0])
+    valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
+
+    scores = np.repeat(det_scores[:, np.newaxis], trk_tlbrs.shape[0], axis=1)
+    valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
+
+    angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
+    angle_diff_cost = angle_diff_cost * scores.T
+
+    matches, unmatched_a, unmatched_b = linear_assignment(- (iou_matrix + angle_diff_cost), thresh=0.9)
+
+
+    return matches, unmatched_a, unmatched_b
+
+"""
+helper func of observation_centric_association
+"""
+def speed_direction_batch(dets, tracks):
+    tracks = tracks[..., np.newaxis]
+    CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:,1] + dets[:,3]) / 2.0
+    CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0
+    dx = CX2 - CX1 
+    dy = CY2 - CY1 
+    norm = np.sqrt(dx**2 + dy**2) + 1e-6
+    dx = dx / norm 
+    dy = dy / norm
+    return dy, dx  # size: num_track x num_det
+
+
+def matching_cascade(
+        distance_metric, matching_thresh, cascade_depth, tracks, detections,
+        track_indices=None, detection_indices=None):
+    """
+    Run matching cascade in DeepSORT
+
+    distance_metirc: function that calculate the cost matrix
+    matching_thresh: float, Associations with cost larger than this value are disregarded.
+    cascade_path: int, equal to max_age of a tracklet
+    tracks: List[STrack], current tracks
+    detections: List[STrack], current detections
+    track_indices: List[int], tracks that will be calculated, Default None
+    detection_indices: List[int], detections that will be calculated, Default None
+
+    return:
+    matched pair, unmatched tracks, unmatced detections: List[int], List[int], List[int]
+    """
+    if track_indices is None:
+        track_indices = list(range(len(tracks)))
+    if detection_indices is None:
+        detection_indices = list(range(len(detections)))
+
+    detections_to_match = detection_indices
+    matches = []
+
+    for level in range(cascade_depth):
+        """
+        match new track with detection firstly
+        """
+        if not len(detections_to_match):  # No detections left
+            break
+
+        track_indices_l = [
+            k for k in track_indices
+            if tracks[k].time_since_update == 1 + level
+        ]  # filter tracks whose age is equal to level + 1 (The age of Newest track = 1)
+
+        if not len(track_indices_l):  # Nothing to match at this level
+            continue
+        
+        # tracks and detections which will be mathcted in current level
+        track_l = [tracks[idx] for idx in track_indices_l]  # List[STrack]
+        det_l = [detections[idx] for idx in detections_to_match]  # List[STrack]
+
+        # calculate the cost matrix
+        cost_matrix = distance_metric(track_l, det_l)
+
+        # solve the linear assignment problem
+        matched_row_col, umatched_row, umatched_col = \
+            linear_assignment(cost_matrix, matching_thresh)
+        
+        for row, col in matched_row_col:  # for those who matched
+            matches.append((track_indices_l[row], detections_to_match[col]))
+
+        umatched_detecion_l = []  # current detections not matched
+        for col in umatched_col:  # for detections not matched
+            umatched_detecion_l.append(detections_to_match[col])
+        
+        detections_to_match = umatched_detecion_l  # update detections to match for next level
+    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
+
+    return matches, unmatched_tracks, detections_to_match
+
+def nearest_embedding_distance(tracks, detections, metric='cosine'):
+    """
+    different from embedding distance, this func calculate the 
+    nearest distance among all track history features and detections
+
+    tracks: list[STrack]
+    detections: list[STrack]
+    metric: str, cosine or euclidean
+    TODO: support euclidean distance
+
+    return:
+    cost_matrix, np.ndarray, shape(len(tracks), len(detections))
+    """
+    cost_matrix = np.zeros((len(tracks), len(detections)))
+    det_features = np.asarray([det.features[-1] for det in detections])
+
+    for row, track in enumerate(tracks):
+        track_history_features = np.asarray(track.features)
+        dist = 1. - cal_cosine_distance(track_history_features, det_features)
+        dist = dist.min(axis=0)
+        cost_matrix[row, :] = dist
+    
+    return cost_matrix
+
+def cal_cosine_distance(mat1, mat2):
+    """
+    simple func to calculate cosine distance between 2 matrixs
+    
+    :param mat1: np.ndarray, shape(M, dim)
+    :param mat2: np.ndarray, shape(N, dim)
+    :return: np.ndarray, shape(M, N)
+    """
+    # result = mat1·mat2^T / |mat1|·|mat2|
+    # norm mat1 and mat2
+    mat1 = mat1 / np.linalg.norm(mat1, axis=1, keepdims=True)
+    mat2 = mat2 / np.linalg.norm(mat2, axis=1, keepdims=True)
+
+    return np.dot(mat1, mat2.T)  
--- a/test/yolov7-tracker/tracker/trackers/ocsort_tracker.py
+++ b/test/yolov7-tracker/tracker/trackers/ocsort_tracker.py
@@ -0,0 +1,237 @@
+"""
+OC Sort
+"""
+
+import numpy as np
+from collections import deque
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet, Tracklet_w_velocity
+from .matching import *
+
+from cython_bbox import bbox_overlaps as bbox_ious
+
+class OCSortTracker(object):
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+        self.delta_t = 3
+
+    @staticmethod
+    def k_previous_obs(observations, cur_age, k):
+        if len(observations) == 0:
+            return [-1, -1, -1, -1, -1]
+        for i in range(k):
+            dt = k - i
+            if cur_age - dt in observations:
+                return observations[cur_age-dt]
+        max_age = max(observations.keys())
+        return observations[max_age]
+
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlbr format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+        inds_low = scores > 0.1
+        inds_high = scores < self.args.conf_thresh
+
+        inds_second = np.logical_and(inds_low, inds_high)
+        dets_second = bboxes[inds_second]
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        cates_second = categories[inds_second]
+        
+        scores_keep = scores[remain_inds]
+        scores_second = scores[inds_second]
+
+        if len(dets) > 0:
+            '''Detections'''
+            detections = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets, scores_keep, cates)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, Observation Centric Momentum'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        velocities = np.array(
+            [trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in tracklet_pool])
+        
+        # last observation, obervation-centric
+        # last_boxes = np.array([trk.last_observation for trk in tracklet_pool])
+
+        # historical observations
+        k_observations = np.array(
+            [self.k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in tracklet_pool])
+
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        # Observation centric cost matrix and assignment
+        matches, u_track, u_detection = observation_centric_association(
+            tracklets=tracklet_pool, detections=detections, iou_threshold=0.3, 
+            velocities=velocities, previous_obs=k_observations, vdc_weight=0.2
+        )
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        ''' Step 3: Second association, with low score detection boxes'''
+        # association the untrack to the low score detections
+        if len(dets_second) > 0:
+            '''Detections'''
+            detections_second = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
+        else:
+            detections_second = []
+        r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+
+        # for unmatched tracks in the first round, use last obervation
+        r_tracked_tracklets_last_observ = [tracklet_pool[i].last_observation[:4] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+        detections_second_bbox = [det.tlbr for det in detections_second]
+
+        dists = 1. - ious(r_tracked_tracklets_last_observ, detections_second_bbox)
+
+        matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
+        for itracked, idet in matches:
+            track = r_tracked_tracklets[itracked]
+            det = detections_second[idet]
+            if track.state == TrackState.Tracked:
+                track.update(det, self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        for it in u_track:
+            track = r_tracked_tracklets[it]
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detections[i] for i in u_detection]
+        dists = iou_distance(unconfirmed, detections)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+    
+    
+
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/test/yolov7-tracker/tracker/trackers/reid_models/AFLink.py
+++ b/test/yolov7-tracker/tracker/trackers/reid_models/AFLink.py
@@ -0,0 +1,98 @@
+"""
+AFLink code in StrongSORT(StrongSORT: Make DeepSORT Great Again(arxiv))
+
+copied from origin repo
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import cv2
+import logging
+import torchvision.transforms as transforms
+
+
+class TemporalBlock(nn.Module):
+    def __init__(self, cin, cout):
+        super(TemporalBlock, self).__init__()
+        self.conv = nn.Conv2d(cin, cout, (7, 1), bias=False)
+        self.relu = nn.ReLU(inplace=True)
+        self.bnf = nn.BatchNorm1d(cout)
+        self.bnx = nn.BatchNorm1d(cout)
+        self.bny = nn.BatchNorm1d(cout)
+
+    def bn(self, x):
+        x[:, :, :, 0] = self.bnf(x[:, :, :, 0])
+        x[:, :, :, 1] = self.bnx(x[:, :, :, 1])
+        x[:, :, :, 2] = self.bny(x[:, :, :, 2])
+        return x
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class FusionBlock(nn.Module):
+    def __init__(self, cin, cout):
+        super(FusionBlock, self).__init__()
+        self.conv = nn.Conv2d(cin, cout, (1, 3), bias=False)
+        self.bn = nn.BatchNorm2d(cout)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class Classifier(nn.Module):
+    def __init__(self, cin):
+        super(Classifier, self).__init__()
+        self.fc1 = nn.Linear(cin*2, cin//2)
+        self.relu = nn.ReLU(inplace=True)
+        self.fc2 = nn.Linear(cin//2, 2)
+
+    def forward(self, x1, x2):
+        x = torch.cat((x1, x2), dim=1)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        return x
+
+
+class PostLinker(nn.Module):
+    def __init__(self):
+        super(PostLinker, self).__init__()
+        self.TemporalModule_1 = nn.Sequential(
+            TemporalBlock(1, 32),
+            TemporalBlock(32, 64),
+            TemporalBlock(64, 128),
+            TemporalBlock(128, 256)
+        )
+        self.TemporalModule_2 = nn.Sequential(
+            TemporalBlock(1, 32),
+            TemporalBlock(32, 64),
+            TemporalBlock(64, 128),
+            TemporalBlock(128, 256)
+        )
+        self.FusionBlock_1 = FusionBlock(256, 256)
+        self.FusionBlock_2 = FusionBlock(256, 256)
+        self.pooling = nn.AdaptiveAvgPool2d((1, 1))
+        self.classifier = Classifier(256)
+
+    def forward(self, x1, x2):
+        x1 = x1[:, :, :, :3]
+        x2 = x2[:, :, :, :3]
+        x1 = self.TemporalModule_1(x1)  # [B,1,30,3] -> [B,256,6,3]
+        x2 = self.TemporalModule_2(x2)
+        x1 = self.FusionBlock_1(x1)
+        x2 = self.FusionBlock_2(x2)
+        x1 = self.pooling(x1).squeeze(-1).squeeze(-1)
+        x2 = self.pooling(x2).squeeze(-1).squeeze(-1)
+        y = self.classifier(x1, x2)
+        if not self.training:
+            y = torch.softmax(y, dim=1)
+        return y
--- a/test/yolov7-tracker/tracker/trackers/reid_models/OSNet.py
+++ b/test/yolov7-tracker/tracker/trackers/reid_models/OSNet.py
@@ -0,0 +1,598 @@
+from __future__ import division, absolute_import
+import warnings
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+__all__ = [
+    'osnet_x1_0', 'osnet_x0_75', 'osnet_x0_5', 'osnet_x0_25', 'osnet_ibn_x1_0'
+]
+
+pretrained_urls = {
+    'osnet_x1_0':
+    'https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY',
+    'osnet_x0_75':
+    'https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq',
+    'osnet_x0_5':
+    'https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i',
+    'osnet_x0_25':
+    'https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs',
+    'osnet_ibn_x1_0':
+    'https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l'
+}
+
+
+##########
+# Basic layers
+##########
+class ConvLayer(nn.Module):
+    """Convolution layer (conv + bn + relu)."""
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        groups=1,
+        IN=False
+    ):
+        super(ConvLayer, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=False,
+            groups=groups
+        )
+        if IN:
+            self.bn = nn.InstanceNorm2d(out_channels, affine=True)
+        else:
+            self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class Conv1x1(nn.Module):
+    """1x1 convolution + bn + relu."""
+
+    def __init__(self, in_channels, out_channels, stride=1, groups=1):
+        super(Conv1x1, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            1,
+            stride=stride,
+            padding=0,
+            bias=False,
+            groups=groups
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class Conv1x1Linear(nn.Module):
+    """1x1 convolution + bn (w/o non-linearity)."""
+
+    def __init__(self, in_channels, out_channels, stride=1):
+        super(Conv1x1Linear, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels, out_channels, 1, stride=stride, padding=0, bias=False
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class Conv3x3(nn.Module):
+    """3x3 convolution + bn + relu."""
+
+    def __init__(self, in_channels, out_channels, stride=1, groups=1):
+        super(Conv3x3, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            3,
+            stride=stride,
+            padding=1,
+            bias=False,
+            groups=groups
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class LightConv3x3(nn.Module):
+    """Lightweight 3x3 convolution.
+
+    1x1 (linear) + dw 3x3 (nonlinear).
+    """
+
+    def __init__(self, in_channels, out_channels):
+        super(LightConv3x3, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels, out_channels, 1, stride=1, padding=0, bias=False
+        )
+        self.conv2 = nn.Conv2d(
+            out_channels,
+            out_channels,
+            3,
+            stride=1,
+            padding=1,
+            bias=False,
+            groups=out_channels
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+##########
+# Building blocks for omni-scale feature learning
+##########
+class ChannelGate(nn.Module):
+    """A mini-network that generates channel-wise gates conditioned on input tensor."""
+
+    def __init__(
+        self,
+        in_channels,
+        num_gates=None,
+        return_gates=False,
+        gate_activation='sigmoid',
+        reduction=16,
+        layer_norm=False
+    ):
+        super(ChannelGate, self).__init__()
+        if num_gates is None:
+            num_gates = in_channels
+        self.return_gates = return_gates
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.fc1 = nn.Conv2d(
+            in_channels,
+            in_channels // reduction,
+            kernel_size=1,
+            bias=True,
+            padding=0
+        )
+        self.norm1 = None
+        if layer_norm:
+            self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
+        self.relu = nn.ReLU(inplace=True)
+        self.fc2 = nn.Conv2d(
+            in_channels // reduction,
+            num_gates,
+            kernel_size=1,
+            bias=True,
+            padding=0
+        )
+        if gate_activation == 'sigmoid':
+            self.gate_activation = nn.Sigmoid()
+        elif gate_activation == 'relu':
+            self.gate_activation = nn.ReLU(inplace=True)
+        elif gate_activation == 'linear':
+            self.gate_activation = None
+        else:
+            raise RuntimeError(
+                "Unknown gate activation: {}".format(gate_activation)
+            )
+
+    def forward(self, x):
+        input = x
+        x = self.global_avgpool(x)
+        x = self.fc1(x)
+        if self.norm1 is not None:
+            x = self.norm1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        if self.gate_activation is not None:
+            x = self.gate_activation(x)
+        if self.return_gates:
+            return x
+        return input * x
+
+
+class OSBlock(nn.Module):
+    """Omni-scale feature learning block."""
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        IN=False,
+        bottleneck_reduction=4,
+        **kwargs
+    ):
+        super(OSBlock, self).__init__()
+        mid_channels = out_channels // bottleneck_reduction
+        self.conv1 = Conv1x1(in_channels, mid_channels)
+        self.conv2a = LightConv3x3(mid_channels, mid_channels)
+        self.conv2b = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+        )
+        self.conv2c = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+        )
+        self.conv2d = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+        )
+        self.gate = ChannelGate(mid_channels)
+        self.conv3 = Conv1x1Linear(mid_channels, out_channels)
+        self.downsample = None
+        if in_channels != out_channels:
+            self.downsample = Conv1x1Linear(in_channels, out_channels)
+        self.IN = None
+        if IN:
+            self.IN = nn.InstanceNorm2d(out_channels, affine=True)
+
+    def forward(self, x):
+        identity = x
+        x1 = self.conv1(x)
+        x2a = self.conv2a(x1)
+        x2b = self.conv2b(x1)
+        x2c = self.conv2c(x1)
+        x2d = self.conv2d(x1)
+        x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
+        x3 = self.conv3(x2)
+        if self.downsample is not None:
+            identity = self.downsample(identity)
+        out = x3 + identity
+        if self.IN is not None:
+            out = self.IN(out)
+        return F.relu(out)
+
+
+##########
+# Network architecture
+##########
+class OSNet(nn.Module):
+    """Omni-Scale Network.
+    
+    Reference:
+        - Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.
+        - Zhou et al. Learning Generalisable Omni-Scale Representations
+          for Person Re-Identification. TPAMI, 2021.
+    """
+
+    def __init__(
+        self,
+        num_classes,
+        blocks,
+        layers,
+        channels,
+        feature_dim=512,
+        loss='softmax',
+        IN=False,
+        **kwargs
+    ):
+        super(OSNet, self).__init__()
+        num_blocks = len(blocks)
+        assert num_blocks == len(layers)
+        assert num_blocks == len(channels) - 1
+        self.loss = loss
+        self.feature_dim = feature_dim
+
+        # convolutional backbone
+        self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
+        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+        self.conv2 = self._make_layer(
+            blocks[0],
+            layers[0],
+            channels[0],
+            channels[1],
+            reduce_spatial_size=True,
+            IN=IN
+        )
+        self.conv3 = self._make_layer(
+            blocks[1],
+            layers[1],
+            channels[1],
+            channels[2],
+            reduce_spatial_size=True
+        )
+        self.conv4 = self._make_layer(
+            blocks[2],
+            layers[2],
+            channels[2],
+            channels[3],
+            reduce_spatial_size=False
+        )
+        self.conv5 = Conv1x1(channels[3], channels[3])
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        # fully connected layer
+        self.fc = self._construct_fc_layer(
+            self.feature_dim, channels[3], dropout_p=None
+        )
+        # identity classification layer
+        self.classifier = nn.Linear(self.feature_dim, num_classes)
+
+        self._init_params()
+
+    def _make_layer(
+        self,
+        block,
+        layer,
+        in_channels,
+        out_channels,
+        reduce_spatial_size,
+        IN=False
+    ):
+        layers = []
+
+        layers.append(block(in_channels, out_channels, IN=IN))
+        for i in range(1, layer):
+            layers.append(block(out_channels, out_channels, IN=IN))
+
+        if reduce_spatial_size:
+            layers.append(
+                nn.Sequential(
+                    Conv1x1(out_channels, out_channels),
+                    nn.AvgPool2d(2, stride=2)
+                )
+            )
+
+        return nn.Sequential(*layers)
+
+    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+        if fc_dims is None or fc_dims < 0:
+            self.feature_dim = input_dim
+            return None
+
+        if isinstance(fc_dims, int):
+            fc_dims = [fc_dims]
+
+        layers = []
+        for dim in fc_dims:
+            layers.append(nn.Linear(input_dim, dim))
+            layers.append(nn.BatchNorm1d(dim))
+            layers.append(nn.ReLU(inplace=True))
+            if dropout_p is not None:
+                layers.append(nn.Dropout(p=dropout_p))
+            input_dim = dim
+
+        self.feature_dim = fc_dims[-1]
+
+        return nn.Sequential(*layers)
+
+    def _init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode='fan_out', nonlinearity='relu'
+                )
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.BatchNorm1d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def featuremaps(self, x):
+        x = self.conv1(x)
+        x = self.maxpool(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x = self.conv4(x)
+        x = self.conv5(x)
+        return x
+
+    def forward(self, x, return_featuremaps=False):
+        x = self.featuremaps(x)
+        if return_featuremaps:
+            return x
+        v = self.global_avgpool(x)
+        v = v.view(v.size(0), -1)
+        if self.fc is not None:
+            v = self.fc(v)
+        if not self.training:
+            return v
+        y = self.classifier(v)
+        if self.loss == 'softmax':
+            return y
+        elif self.loss == 'triplet':
+            return y, v
+        else:
+            raise KeyError("Unsupported loss: {}".format(self.loss))
+
+
+def init_pretrained_weights(model, key=''):
+    """Initializes model with pretrained weights.
+    
+    Layers that don't match with pretrained layers in name or size are kept unchanged.
+    """
+    import os
+    import errno
+    import gdown
+    from collections import OrderedDict
+
+    def _get_torch_home():
+        ENV_TORCH_HOME = 'TORCH_HOME'
+        ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
+        DEFAULT_CACHE_DIR = '~/.cache'
+        torch_home = os.path.expanduser(
+            os.getenv(
+                ENV_TORCH_HOME,
+                os.path.join(
+                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
+                )
+            )
+        )
+        return torch_home
+
+    torch_home = _get_torch_home()
+    model_dir = os.path.join(torch_home, 'checkpoints')
+    try:
+        os.makedirs(model_dir)
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            # Directory already exists, ignore.
+            pass
+        else:
+            # Unexpected OSError, re-raise.
+            raise
+    filename = key + '_imagenet.pth'
+    cached_file = os.path.join(model_dir, filename)
+
+    if not os.path.exists(cached_file):
+        gdown.download(pretrained_urls[key], cached_file, quiet=False)
+
+    state_dict = torch.load(cached_file)
+    model_dict = model.state_dict()
+    new_state_dict = OrderedDict()
+    matched_layers, discarded_layers = [], []
+
+    for k, v in state_dict.items():
+        if k.startswith('module.'):
+            k = k[7:] # discard module.
+
+        if k in model_dict and model_dict[k].size() == v.size():
+            new_state_dict[k] = v
+            matched_layers.append(k)
+        else:
+            discarded_layers.append(k)
+
+    model_dict.update(new_state_dict)
+    model.load_state_dict(model_dict)
+
+    if len(matched_layers) == 0:
+        warnings.warn(
+            'The pretrained weights from "{}" cannot be loaded, '
+            'please check the key names manually '
+            '(** ignored and continue **)'.format(cached_file)
+        )
+    else:
+        print(
+            'Successfully loaded imagenet pretrained weights from "{}"'.
+            format(cached_file)
+        )
+        if len(discarded_layers) > 0:
+            print(
+                '** The following layers are discarded '
+                'due to unmatched keys or layer size: {}'.
+                format(discarded_layers)
+            )
+
+
+##########
+# Instantiation
+##########
+def osnet_x1_0(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
+    # standard size (width x1.0)
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[64, 256, 384, 512],
+        loss=loss,
+        **kwargs
+    )
+    if pretrained:
+        init_pretrained_weights(model, key='osnet_x1_0')
+    return model
+
+
+def osnet_x0_75(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
+    # medium size (width x0.75)
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[48, 192, 288, 384],
+        loss=loss,
+        **kwargs
+    )
+    if pretrained:
+        init_pretrained_weights(model, key='osnet_x0_75')
+    return model
+
+
+def osnet_x0_5(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
+    # tiny size (width x0.5)
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[32, 128, 192, 256],
+        loss=loss,
+        **kwargs
+    )
+    if pretrained:
+        init_pretrained_weights(model, key='osnet_x0_5')
+    return model
+
+
+def osnet_x0_25(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
+    # very tiny size (width x0.25)
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[16, 64, 96, 128],
+        loss=loss,
+        **kwargs
+    )
+    if pretrained:
+        init_pretrained_weights(model, key='osnet_x0_25')
+    return model
+
+
+def osnet_ibn_x1_0(
+    num_classes=1000, pretrained=True, loss='softmax', **kwargs
+):
+    # standard size (width x1.0) + IBN layer
+    # Ref: Pan et al. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. ECCV, 2018.
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[64, 256, 384, 512],
+        loss=loss,
+        IN=True,
+        **kwargs
+    )
+    if pretrained:
+        init_pretrained_weights(model, key='osnet_ibn_x1_0')
+    return model
--- a/test/yolov7-tracker/tracker/trackers/reid_models/init.py
+++ b/test/yolov7-tracker/tracker/trackers/reid_models/init.py
@@ -0,0 +1,3 @@
+"""
+file for reid_models folder
+"""
--- a/test/yolov7-tracker/tracker/trackers/reid_models/deepsort_reid.py
+++ b/test/yolov7-tracker/tracker/trackers/reid_models/deepsort_reid.py
@@ -0,0 +1,157 @@
+"""
+file for DeepSORT Re-ID model
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import cv2
+import logging
+import torchvision.transforms as transforms
+
+
+class BasicBlock(nn.Module):
+    def __init__(self, c_in, c_out, is_downsample=False):
+        super(BasicBlock, self).__init__()
+        self.is_downsample = is_downsample
+        if is_downsample:
+            self.conv1 = nn.Conv2d(
+                c_in, c_out, 3, stride=2, padding=1, bias=False)
+        else:
+            self.conv1 = nn.Conv2d(
+                c_in, c_out, 3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(c_out)
+        self.relu = nn.ReLU(True)
+        self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(c_out)
+        if is_downsample:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+        elif c_in != c_out:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+            self.is_downsample = True
+
+    def forward(self, x):
+        y = self.conv1(x)
+        y = self.bn1(y)
+        y = self.relu(y)
+        y = self.conv2(y)
+        y = self.bn2(y)
+        if self.is_downsample:
+            x = self.downsample(x)
+        return F.relu(x.add(y), True)
+
+
+def make_layers(c_in, c_out, repeat_times, is_downsample=False):
+    blocks = []
+    for i in range(repeat_times):
+        if i == 0:
+            blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ]
+        else:
+            blocks += [BasicBlock(c_out, c_out), ]
+    return nn.Sequential(*blocks)
+
+
+class Net(nn.Module):
+    def __init__(self, num_classes=751, reid=False):
+        super(Net, self).__init__()
+        # 3 128 64
+        self.conv = nn.Sequential(
+            nn.Conv2d(3, 64, 3, stride=1, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            # nn.Conv2d(32,32,3,stride=1,padding=1),
+            # nn.BatchNorm2d(32),
+            # nn.ReLU(inplace=True),
+            nn.MaxPool2d(3, 2, padding=1),
+        )
+        # 32 64 32
+        self.layer1 = make_layers(64, 64, 2, False)
+        # 32 64 32
+        self.layer2 = make_layers(64, 128, 2, True)
+        # 64 32 16
+        self.layer3 = make_layers(128, 256, 2, True)
+        # 128 16 8
+        self.layer4 = make_layers(256, 512, 2, True)
+        # 256 8 4
+        self.avgpool = nn.AvgPool2d((8, 4), 1)
+        # 256 1 1
+        self.reid = reid
+        self.classifier = nn.Sequential(
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(inplace=True),
+            nn.Dropout(),
+            nn.Linear(256, num_classes),
+        )
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        # B x 128
+        if self.reid:
+            x = x.div(x.norm(p=2, dim=1, keepdim=True))
+            return x
+        # classifier
+        x = self.classifier(x)
+        return x
+
+
+class Extractor(object):
+    def __init__(self, model_path, use_cuda=True):
+        self.net = Net(reid=True)
+        self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
+        state_dict = torch.load(model_path, map_location=torch.device(self.device))[
+            'net_dict']
+        self.net.load_state_dict(state_dict)
+        logger = logging.getLogger("root.tracker")
+        logger.info("Loading weights from {}... Done!".format(model_path))
+        self.net.to(self.device)
+        self.size = (64, 128)
+        self.norm = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+        ])
+
+    def _preprocess(self, im_crops):
+        """
+        TODO:
+            1. to float with scale from 0 to 1
+            2. resize to (64, 128) as Market1501 dataset did
+            3. concatenate to a numpy array
+            3. to torch Tensor
+            4. normalize
+        """
+        def _resize(im, size):
+            try:
+                return cv2.resize(im.astype(np.float32)/255., size)
+            except:
+                print('Error: size in bbox exists zero, ', im.shape)
+                exit(0)
+
+        im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(
+            0) for im in im_crops], dim=0).float()
+        return im_batch
+
+    def __call__(self, im_crops):
+        if isinstance(im_crops, list):
+            im_batch = self._preprocess(im_crops)
+        else:
+            im_batch = im_crops 
+
+        with torch.no_grad():
+            im_batch = im_batch.to(self.device)
+            features = self.net(im_batch)
+        return features
--- a/test/yolov7-tracker/tracker/trackers/reid_models/load_model_tools.py
+++ b/test/yolov7-tracker/tracker/trackers/reid_models/load_model_tools.py
@@ -0,0 +1,273 @@
+"""
+load checkpoint file 
+copied from https://github.com/mikel-brostrom/Yolov5_StrongSORT_OSNet
+"""
+from __future__ import division, print_function, absolute_import
+import pickle
+import shutil
+import os.path as osp
+import warnings
+from functools import partial
+from collections import OrderedDict
+import torch
+import torch.nn as nn
+
+
+__all__ = [
+    'save_checkpoint', 'load_checkpoint', 'resume_from_checkpoint',
+    'open_all_layers', 'open_specified_layers', 'count_num_param',
+    'load_pretrained_weights'
+]
+
+def load_checkpoint(fpath):
+    r"""Loads checkpoint.
+
+    ``UnicodeDecodeError`` can be well handled, which means
+    python2-saved files can be read from python3.
+
+    Args:
+        fpath (str): path to checkpoint.
+
+    Returns:
+        dict
+
+    Examples::
+        >>> from torchreid.utils import load_checkpoint
+        >>> fpath = 'log/my_model/model.pth.tar-10'
+        >>> checkpoint = load_checkpoint(fpath)
+    """
+    if fpath is None:
+        raise ValueError('File path is None')
+    fpath = osp.abspath(osp.expanduser(fpath))
+    if not osp.exists(fpath):
+        raise FileNotFoundError('File is not found at "{}"'.format(fpath))
+    map_location = None if torch.cuda.is_available() else 'cpu'
+    try:
+        checkpoint = torch.load(fpath, map_location=map_location)
+    except UnicodeDecodeError:
+        pickle.load = partial(pickle.load, encoding="latin1")
+        pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1")
+        checkpoint = torch.load(
+            fpath, pickle_module=pickle, map_location=map_location
+        )
+    except Exception:
+        print('Unable to load checkpoint from "{}"'.format(fpath))
+        raise
+    return checkpoint
+
+
+def resume_from_checkpoint(fpath, model, optimizer=None, scheduler=None):
+    r"""Resumes training from a checkpoint.
+
+    This will load (1) model weights and (2) ``state_dict``
+    of optimizer if ``optimizer`` is not None.
+
+    Args:
+        fpath (str): path to checkpoint.
+        model (nn.Module): model.
+        optimizer (Optimizer, optional): an Optimizer.
+        scheduler (LRScheduler, optional): an LRScheduler.
+
+    Returns:
+        int: start_epoch.
+
+    Examples::
+        >>> from torchreid.utils import resume_from_checkpoint
+        >>> fpath = 'log/my_model/model.pth.tar-10'
+        >>> start_epoch = resume_from_checkpoint(
+        >>>     fpath, model, optimizer, scheduler
+        >>> )
+    """
+    print('Loading checkpoint from "{}"'.format(fpath))
+    checkpoint = load_checkpoint(fpath)
+    model.load_state_dict(checkpoint['state_dict'])
+    print('Loaded model weights')
+    if optimizer is not None and 'optimizer' in checkpoint.keys():
+        optimizer.load_state_dict(checkpoint['optimizer'])
+        print('Loaded optimizer')
+    if scheduler is not None and 'scheduler' in checkpoint.keys():
+        scheduler.load_state_dict(checkpoint['scheduler'])
+        print('Loaded scheduler')
+    start_epoch = checkpoint['epoch']
+    print('Last epoch = {}'.format(start_epoch))
+    if 'rank1' in checkpoint.keys():
+        print('Last rank1 = {:.1%}'.format(checkpoint['rank1']))
+    return start_epoch
+
+
+def adjust_learning_rate(
+    optimizer,
+    base_lr,
+    epoch,
+    stepsize=20,
+    gamma=0.1,
+    linear_decay=False,
+    final_lr=0,
+    max_epoch=100
+):
+    r"""Adjusts learning rate.
+
+    Deprecated.
+    """
+    if linear_decay:
+        # linearly decay learning rate from base_lr to final_lr
+        frac_done = epoch / max_epoch
+        lr = frac_done*final_lr + (1.-frac_done) * base_lr
+    else:
+        # decay learning rate by gamma for every stepsize
+        lr = base_lr * (gamma**(epoch // stepsize))
+
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+
+
+def set_bn_to_eval(m):
+    r"""Sets BatchNorm layers to eval mode."""
+    # 1. no update for running mean and var
+    # 2. scale and shift parameters are still trainable
+    classname = m.__class__.__name__
+    if classname.find('BatchNorm') != -1:
+        m.eval()
+
+
+def open_all_layers(model):
+    r"""Opens all layers in model for training.
+
+    Examples::
+        >>> from torchreid.utils import open_all_layers
+        >>> open_all_layers(model)
+    """
+    model.train()
+    for p in model.parameters():
+        p.requires_grad = True
+
+
+def open_specified_layers(model, open_layers):
+    r"""Opens specified layers in model for training while keeping
+    other layers frozen.
+
+    Args:
+        model (nn.Module): neural net model.
+        open_layers (str or list): layers open for training.
+
+    Examples::
+        >>> from torchreid.utils import open_specified_layers
+        >>> # Only model.classifier will be updated.
+        >>> open_layers = 'classifier'
+        >>> open_specified_layers(model, open_layers)
+        >>> # Only model.fc and model.classifier will be updated.
+        >>> open_layers = ['fc', 'classifier']
+        >>> open_specified_layers(model, open_layers)
+    """
+    if isinstance(model, nn.DataParallel):
+        model = model.module
+
+    if isinstance(open_layers, str):
+        open_layers = [open_layers]
+
+    for layer in open_layers:
+        assert hasattr(
+            model, layer
+        ), '"{}" is not an attribute of the model, please provide the correct name'.format(
+            layer
+        )
+
+    for name, module in model.named_children():
+        if name in open_layers:
+            module.train()
+            for p in module.parameters():
+                p.requires_grad = True
+        else:
+            module.eval()
+            for p in module.parameters():
+                p.requires_grad = False
+
+
+def count_num_param(model):
+    r"""Counts number of parameters in a model while ignoring ``self.classifier``.
+
+    Args:
+        model (nn.Module): network model.
+
+    Examples::
+        >>> from torchreid.utils import count_num_param
+        >>> model_size = count_num_param(model)
+
+    .. warning::
+        
+        This method is deprecated in favor of
+        ``torchreid.utils.compute_model_complexity``.
+    """
+    warnings.warn(
+        'This method is deprecated and will be removed in the future.'
+    )
+
+    num_param = sum(p.numel() for p in model.parameters())
+
+    if isinstance(model, nn.DataParallel):
+        model = model.module
+
+    if hasattr(model,
+               'classifier') and isinstance(model.classifier, nn.Module):
+        # we ignore the classifier because it is unused at test time
+        num_param -= sum(p.numel() for p in model.classifier.parameters())
+
+    return num_param
+
+
+def load_pretrained_weights(model, weight_path):
+    r"""Loads pretrianed weights to model.
+
+    Features::
+        - Incompatible layers (unmatched in name or size) will be ignored.
+        - Can automatically deal with keys containing "module.".
+
+    Args:
+        model (nn.Module): network model.
+        weight_path (str): path to pretrained weights.
+
+    Examples::
+        >>> from torchreid.utils import load_pretrained_weights
+        >>> weight_path = 'log/my_model/model-best.pth.tar'
+        >>> load_pretrained_weights(model, weight_path)
+    """
+    checkpoint = load_checkpoint(weight_path)
+    if 'state_dict' in checkpoint:
+        state_dict = checkpoint['state_dict']
+    else:
+        state_dict = checkpoint
+
+    model_dict = model.state_dict()
+    new_state_dict = OrderedDict()
+    matched_layers, discarded_layers = [], []
+
+    for k, v in state_dict.items():
+        if k.startswith('module.'):
+            k = k[7:] # discard module.
+
+        if k in model_dict and model_dict[k].size() == v.size():
+            new_state_dict[k] = v
+            matched_layers.append(k)
+        else:
+            discarded_layers.append(k)
+
+    model_dict.update(new_state_dict)
+    model.load_state_dict(model_dict)
+
+    if len(matched_layers) == 0:
+        warnings.warn(
+            'The pretrained weights "{}" cannot be loaded, '
+            'please check the key names manually '
+            '(** ignored and continue **)'.format(weight_path)
+        )
+    else:
+        print(
+            'Successfully loaded pretrained weights from "{}"'.
+            format(weight_path)
+        )
+        if len(discarded_layers) > 0:
+            print(
+                '** The following layers are discarded '
+                'due to unmatched keys or layer size: {}'.
+                format(discarded_layers)
+            )
--- a/test/yolov7-tracker/tracker/trackers/sort_tracker.py
+++ b/test/yolov7-tracker/tracker/trackers/sort_tracker.py
@@ -0,0 +1,169 @@
+"""
+Sort
+"""
+
+import numpy as np
+from collections import deque
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet
+from .matching import *
+
+class SortTracker(object):
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlbr format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        
+        scores_keep = scores[remain_inds]
+
+        if len(dets) > 0:
+            '''Detections'''
+            detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets, scores_keep, cates)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with high score detection boxes'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        dists = iou_distance(tracklet_pool, detections)
+        
+        matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detections[i] for i in u_detection]
+        dists = iou_distance(unconfirmed, detections)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 3: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 4: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/test/yolov7-tracker/tracker/trackers/sparse_tracker.py
+++ b/test/yolov7-tracker/tracker/trackers/sparse_tracker.py
@@ -0,0 +1,338 @@
+"""
+Bot sort
+"""
+
+import numpy as np  
+import torch 
+from torchvision.ops import nms
+
+import cv2 
+import torchvision.transforms as T
+
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet, Tracklet_w_depth
+from .matching import *
+
+from .reid_models.OSNet import *
+from .reid_models.load_model_tools import load_pretrained_weights
+from .reid_models.deepsort_reid import Extractor
+
+from .camera_motion_compensation import GMC
+
+REID_MODEL_DICT = {
+    'osnet_x1_0': osnet_x1_0, 
+    'osnet_x0_75': osnet_x0_75, 
+    'osnet_x0_5': osnet_x0_5, 
+    'osnet_x0_25': osnet_x0_25, 
+    'deepsort': Extractor
+}
+
+
+def load_reid_model(reid_model, reid_model_path):
+    
+    if 'osnet' in reid_model:
+        func = REID_MODEL_DICT[reid_model]
+        model = func(num_classes=1, pretrained=False, )
+        load_pretrained_weights(model, reid_model_path)
+        model.cuda().eval()
+        
+    elif 'deepsort' in reid_model:
+        model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
+
+    else:
+        raise NotImplementedError
+    
+    return model
+
+class SparseTracker(object):
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format            
+
+        # camera motion compensation module
+        self.gmc = GMC(method='orb', downscale=2, verbose=None)
+
+    def get_deep_range(self, obj, step):
+        col = []
+        for t in obj:
+            lend = (t.deep_vec)[2]
+            col.append(lend)
+        max_len, mix_len = max(col), min(col)
+        if max_len != mix_len:
+            deep_range =np.arange(mix_len, max_len, (max_len - mix_len + 1) / step)
+            if deep_range[-1] < max_len:
+                deep_range = np.concatenate([deep_range, np.array([max_len],)])
+                deep_range[0] = np.floor(deep_range[0])
+                deep_range[-1] = np.ceil(deep_range[-1])
+        else:    
+            deep_range = [mix_len,] 
+        mask = self.get_sub_mask(deep_range, col)      
+        return mask
+    
+    def get_sub_mask(self, deep_range, col):
+        mix_len=deep_range[0]
+        max_len=deep_range[-1]
+        if max_len == mix_len:
+            lc = mix_len   
+        mask = []
+        for d in deep_range:
+            if d > deep_range[0] and d < deep_range[-1]:
+                mask.append((col >= lc) & (col < d)) 
+                lc = d
+            elif d == deep_range[-1]:
+                mask.append((col >= lc) & (col <= d)) 
+                lc = d 
+            else:
+                lc = d
+                continue
+        return mask
+    
+    # core function
+    def DCM(self, detections, tracks, activated_tracklets, refind_tracklets, levels, thresh, is_fuse):
+        if len(detections) > 0:
+            det_mask = self.get_deep_range(detections, levels) 
+        else:
+            det_mask = []
+
+        if len(tracks)!=0:
+            track_mask = self.get_deep_range(tracks, levels)
+        else:
+            track_mask = []
+
+        u_detection, u_tracks, res_det, res_track = [], [], [], []
+        if len(track_mask) != 0:
+            if  len(track_mask) < len(det_mask):
+                for i in range(len(det_mask) - len(track_mask)):
+                    idx = np.argwhere(det_mask[len(track_mask) + i] == True)
+                    for idd in idx:
+                        res_det.append(detections[idd[0]])
+            elif len(track_mask) > len(det_mask):
+                for i in range(len(track_mask) - len(det_mask)):
+                    idx = np.argwhere(track_mask[len(det_mask) + i] == True)
+                    for idd in idx:
+                        res_track.append(tracks[idd[0]])
+        
+            for dm, tm in zip(det_mask, track_mask):
+                det_idx = np.argwhere(dm == True)
+                trk_idx = np.argwhere(tm == True)
+                
+                # search det 
+                det_ = []
+                for idd in det_idx:
+                    det_.append(detections[idd[0]])
+                det_ = det_ + u_detection
+                # search trk
+                track_ = []
+                for idt in trk_idx:
+                    track_.append(tracks[idt[0]])
+                # update trk
+                track_ = track_ + u_tracks
+                
+                dists = iou_distance(track_, det_)
+
+                matches, u_track_, u_det_ = linear_assignment(dists, thresh)
+                for itracked, idet in matches:
+                    track = track_[itracked]
+                    det = det_[idet]
+                    if track.state == TrackState.Tracked:
+                        track.update(det_[idet], self.frame_id)
+                        activated_tracklets.append(track)
+                    else:
+                        track.re_activate(det, self.frame_id, new_id=False)
+                        refind_tracklets.append(track)
+                u_tracks = [track_[t] for t in u_track_]
+                u_detection = [det_[t] for t in u_det_]
+                
+            u_tracks = u_tracks + res_track
+            u_detection = u_detection + res_det
+
+        else:
+            u_detection = detections
+            
+        return activated_tracklets, refind_tracklets, u_tracks, u_detection
+
+
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlwh format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+        inds_low = scores > 0.1
+        inds_high = scores < self.args.conf_thresh
+
+        inds_second = np.logical_and(inds_low, inds_high)
+        dets_second = bboxes[inds_second]
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        cates_second = categories[inds_second]
+        
+        scores_keep = scores[remain_inds]
+        scores_second = scores[inds_second]
+
+        if len(dets) > 0:
+            detections = [Tracklet_w_depth(tlwh, s, cate, motion=self.motion) for
+                            (tlwh, s, cate) in zip(dets, scores_keep, cates)]
+        else:
+            detections = []
+
+        ''' Step 1: Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with high score detection boxes, depth cascade mathcing'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        # Camera motion compensation
+        warp = self.gmc.apply(ori_img, dets)
+        self.gmc.multi_gmc(tracklet_pool, warp)
+        self.gmc.multi_gmc(unconfirmed, warp)
+
+        # depth cascade matching
+        activated_tracklets, refind_tracklets, u_track, u_detection_high = self.DCM(
+                                                                                detections, 
+                                                                                tracklet_pool, 
+                                                                                activated_tracklets,
+                                                                                refind_tracklets, 
+                                                                                levels=3, 
+                                                                                thresh=0.75, 
+                                                                                is_fuse=True)  
+        
+        ''' Step 3: Second association, with low score detection boxes, depth cascade mathcing'''
+        if len(dets_second) > 0:
+            '''Detections'''
+            detections_second = [Tracklet_w_depth(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
+        else:
+            detections_second = []
+
+        r_tracked_tracklets = [t for t in u_track if t.state == TrackState.Tracked]  
+
+        activated_tracklets, refind_tracklets, u_track, u_detection_sec = self.DCM(
+                                                                                detections_second, 
+                                                                                r_tracked_tracklets, 
+                                                                                activated_tracklets, 
+                                                                                refind_tracklets, 
+                                                                                levels=3, 
+                                                                                thresh=0.3, 
+                                                                                is_fuse=False) 
+        
+        for track in u_track:
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = u_detection_high
+        dists = iou_distance(unconfirmed, detections)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/test/yolov7-tracker/tracker/trackers/strongsort_tracker.py
+++ b/test/yolov7-tracker/tracker/trackers/strongsort_tracker.py
@@ -0,0 +1,327 @@
+"""
+Deep Sort
+"""
+
+import numpy as np  
+import torch 
+from torchvision.ops import nms
+
+import cv2 
+import torchvision.transforms as T
+
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet, Tracklet_w_reid
+from .matching import *
+
+from .reid_models.OSNet import *
+from .reid_models.load_model_tools import load_pretrained_weights
+from .reid_models.deepsort_reid import Extractor
+
+REID_MODEL_DICT = {
+    'osnet_x1_0': osnet_x1_0, 
+    'osnet_x0_75': osnet_x0_75, 
+    'osnet_x0_5': osnet_x0_5, 
+    'osnet_x0_25': osnet_x0_25, 
+    'deepsort': Extractor
+}
+
+
+def load_reid_model(reid_model, reid_model_path):
+    
+    if 'osnet' in reid_model:
+        func = REID_MODEL_DICT[reid_model]
+        model = func(num_classes=1, pretrained=False, )
+        load_pretrained_weights(model, reid_model_path)
+        model.cuda().eval()
+        
+    elif 'deepsort' in reid_model:
+        model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
+
+    else:
+        raise NotImplementedError
+    
+    return model
+
+
+class StrongSortTracker(object):
+
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+        self.with_reid = not args.discard_reid
+
+        self.reid_model, self.crop_transforms = None, None 
+        if self.with_reid:
+            self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
+            self.crop_transforms = T.Compose([
+            # T.ToPILImage(),
+            # T.Resize(size=(256, 128)),
+            T.ToTensor(),  # (c, 128, 256)
+            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        ])
+            
+        self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128)
+
+        self.lambda_ = 0.98  # the coef of cost mix in eq. 10 in paper
+        
+
+    def reid_preprocess(self, obj_bbox):
+        """
+        preprocess cropped object bboxes 
+        
+        obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
+
+        return: 
+        torch.Tensor of shape (c, 128, 256)
+        """
+
+        obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size)  # shape: (h, w, c)
+
+        return self.crop_transforms(obj_bbox)
+
+    def get_feature(self, tlwhs, ori_img):
+        """
+        get apperance feature of an object
+        tlwhs: shape (num_of_objects, 4)
+        ori_img: original image, np.ndarray, shape(H, W, C)
+        """
+        obj_bbox = []
+
+        for tlwh in tlwhs:
+            tlwh = list(map(int, tlwh))
+
+            # limit to the legal range
+            tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0)
+            
+            tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
+
+            obj_bbox.append(tlbr_tensor)
+        
+        if not obj_bbox:
+            return np.array([])
+        
+        obj_bbox = torch.stack(obj_bbox, dim=0)
+        obj_bbox = obj_bbox.cuda()  
+        
+        features = self.reid_model(obj_bbox)  # shape: (num_of_objects, feature_dim)
+        return features.cpu().detach().numpy()
+    
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlbr format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        
+        scores_keep = scores[remain_inds]
+
+        features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
+
+        if len(dets) > 0:
+            '''Detections'''
+            detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
+                          (tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with appearance'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        # vallina matching
+        cost_matrix = self.gated_metric(tracklet_pool, detections)
+        matches, u_track, u_detection = linear_assignment(cost_matrix, thresh=0.9)
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        '''Step 3: Second association, with iou'''
+        tracklet_for_iou = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+        detection_for_iou = [detections[i] for i in u_detection]
+
+        dists = iou_distance(tracklet_for_iou, detection_for_iou)
+
+        matches, u_track, u_detection = linear_assignment(dists, thresh=0.5)
+
+        for itracked, idet in matches:
+            track = tracklet_for_iou[itracked]
+            det = detection_for_iou[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detection_for_iou[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        for it in u_track:
+            track = tracklet_for_iou[it]
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detection_for_iou[i] for i in u_detection]
+        dists = iou_distance(unconfirmed, detections)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+    
+    def gated_metric(self, tracks, dets):
+        """
+        get cost matrix, firstly calculate apperence cost, then filter by Kalman state.
+
+        tracks: List[STrack]
+        dets: List[STrack]
+        """
+        apperance_dist = embedding_distance(tracks=tracks, detections=dets, metric='cosine')
+        cost_matrix = self.gate_cost_matrix(apperance_dist, tracks, dets, )
+        return cost_matrix
+    
+    def gate_cost_matrix(self, cost_matrix, tracks, dets, max_apperance_thresh=0.15, gated_cost=1e5, only_position=False):
+        """
+        gate cost matrix by calculating the Kalman state distance and constrainted by
+        0.95 confidence interval of x2 distribution
+
+        cost_matrix: np.ndarray, shape (len(tracks), len(dets))
+        tracks: List[STrack]
+        dets: List[STrack]
+        gated_cost: a very largt const to infeasible associations
+        only_position: use [xc, yc, a, h] as state vector or only use [xc, yc]
+
+        return:
+        updated cost_matirx, np.ndarray
+        """
+        gating_dim = 2 if only_position else 4
+        gating_threshold = chi2inv95[gating_dim]
+        measurements = np.asarray([Tracklet.tlwh_to_xyah(det.tlwh) for det in dets])  # (len(dets), 4)
+
+        cost_matrix[cost_matrix > max_apperance_thresh] = gated_cost
+        for row, track in enumerate(tracks):
+            gating_distance = track.kalman_filter.gating_distance(measurements, )
+            cost_matrix[row, gating_distance > gating_threshold] = gated_cost
+
+            cost_matrix[row] = self.lambda_ * cost_matrix[row] + (1 - self.lambda_) *  gating_distance
+        return cost_matrix
+    
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/test/yolov7-tracker/tracker/trackers/tracklet.py
+++ b/test/yolov7-tracker/tracker/trackers/tracklet.py
@@ -0,0 +1,366 @@
+"""
+implements base elements of trajectory
+"""
+
+import numpy as np 
+from collections import deque
+
+from .basetrack import BaseTrack, TrackState 
+from .kalman_filters.bytetrack_kalman import ByteKalman
+from .kalman_filters.botsort_kalman import BotKalman
+from .kalman_filters.ocsort_kalman import OCSORTKalman
+from .kalman_filters.sort_kalman import SORTKalman
+from .kalman_filters.strongsort_kalman import NSAKalman
+
+MOTION_MODEL_DICT = {
+    'sort': SORTKalman, 
+    'byte': ByteKalman, 
+    'bot': BotKalman, 
+    'ocsort': OCSORTKalman, 
+    'strongsort': NSAKalman, 
+}
+
+STATE_CONVERT_DICT = {
+    'sort': 'xysa', 
+    'byte': 'xyah', 
+    'bot': 'xywh', 
+    'ocsort': 'xysa', 
+    'strongsort': 'xyah'
+}
+
+class Tracklet(BaseTrack):
+    def __init__(self, tlwh, score, category, motion='byte'):
+
+        # initial position
+        self._tlwh = np.asarray(tlwh, dtype=np.float)
+        self.is_activated = False
+
+        self.score = score
+        self.category = category
+
+        # kalman
+        self.motion = motion
+        self.kalman_filter = MOTION_MODEL_DICT[motion]()
+        
+        self.convert_func = self.__getattribute__('tlwh_to_' + STATE_CONVERT_DICT[motion])
+
+        # init kalman
+        self.kalman_filter.initialize(self.convert_func(self._tlwh))
+
+    def predict(self):
+        self.kalman_filter.predict()
+        self.time_since_update += 1
+
+    def activate(self, frame_id):
+        self.track_id = self.next_id()
+
+        self.state = TrackState.Tracked
+        if frame_id == 1:
+            self.is_activated = True
+        self.frame_id = frame_id
+        self.start_frame = frame_id
+
+
+    def re_activate(self, new_track, frame_id, new_id=False):
+        
+        # TODO different convert
+        self.kalman_filter.update(self.convert_func(new_track.tlwh))
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+        self.frame_id = frame_id
+        if new_id:
+            self.track_id = self.next_id()
+        self.score = new_track.score
+
+    def update(self, new_track, frame_id):
+        self.frame_id = frame_id
+
+        new_tlwh = new_track.tlwh
+        self.score = new_track.score
+
+        self.kalman_filter.update(self.convert_func(new_tlwh))
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+
+        self.time_since_update = 0
+    
+    @property
+    def tlwh(self):
+        """Get current position in bounding box format `(top left x, top left y,
+                width, height)`.
+        """
+        return self.__getattribute__(STATE_CONVERT_DICT[self.motion] + '_to_tlwh')()
+    
+    def xyah_to_tlwh(self, ):
+        x = self.kalman_filter.kf.x 
+        ret = x[:4].copy()
+        ret[2] *= ret[3]
+        ret[:2] -= ret[2:] / 2
+        return ret
+
+    def xywh_to_tlwh(self, ):
+        x = self.kalman_filter.kf.x 
+        ret = x[:4].copy()
+        ret[:2] -= ret[2:] / 2
+        return ret
+    
+    def xysa_to_tlwh(self, ):
+        x = self.kalman_filter.kf.x 
+        ret = x[:4].copy()
+        ret[2] = np.sqrt(x[2] * x[3])
+        ret[3] = x[2] / ret[2]
+
+        ret[:2] -= ret[2:] / 2
+        return ret
+    
+
+class Tracklet_w_reid(Tracklet):
+    """
+    Tracklet class with reid features, for botsort, deepsort, etc.
+    """
+    
+    def __init__(self, tlwh, score, category, motion='byte', 
+                 feat=None, feat_history=50):
+        super().__init__(tlwh, score, category, motion)
+
+        self.smooth_feat = None  # EMA feature
+        self.curr_feat = None  # current feature
+        self.features = deque([], maxlen=feat_history)  # all features
+        if feat is not None:
+            self.update_features(feat)
+
+        self.alpha = 0.9
+
+    def update_features(self, feat):
+        feat /= np.linalg.norm(feat)
+        self.curr_feat = feat
+        if self.smooth_feat is None:
+            self.smooth_feat = feat
+        else:
+            self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat
+        self.features.append(feat)
+        self.smooth_feat /= np.linalg.norm(self.smooth_feat)
+
+    def re_activate(self, new_track, frame_id, new_id=False):
+        
+        # TODO different convert
+        if isinstance(self.kalman_filter, NSAKalman):
+            self.kalman_filter.update(self.convert_func(new_track.tlwh), new_track.score)
+        else:
+            self.kalman_filter.update(self.convert_func(new_track.tlwh))
+
+        if new_track.curr_feat is not None:
+            self.update_features(new_track.curr_feat)
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+        self.frame_id = frame_id
+        if new_id:
+            self.track_id = self.next_id()
+        self.score = new_track.score
+
+    def update(self, new_track, frame_id):
+        self.frame_id = frame_id
+
+        new_tlwh = new_track.tlwh
+        self.score = new_track.score
+
+        if isinstance(self.kalman_filter, NSAKalman):
+            self.kalman_filter.update(self.convert_func(new_tlwh), self.score)
+        else:
+            self.kalman_filter.update(self.convert_func(new_tlwh))
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+
+
+        if new_track.curr_feat is not None:
+            self.update_features(new_track.curr_feat)
+
+        self.time_since_update = 0
+
+
+class Tracklet_w_velocity(Tracklet):
+    """
+    Tracklet class with reid features, for ocsort.
+    """
+    
+    def __init__(self, tlwh, score, category, motion='byte', delta_t=3):
+        super().__init__(tlwh, score, category, motion)
+
+        self.last_observation = np.array([-1, -1, -1, -1, -1])  # placeholder
+        self.observations = dict()
+        self.history_observations = []
+        self.velocity = None
+        self.delta_t = delta_t
+
+        self.age = 0  # mark the age
+
+    @staticmethod
+    def speed_direction(bbox1, bbox2):
+        cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0
+        cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0
+        speed = np.array([cy2 - cy1, cx2 - cx1])
+        norm = np.sqrt((cy2 - cy1)**2 + (cx2 - cx1)**2) + 1e-6
+        return speed / norm
+    
+    def predict(self):
+        self.kalman_filter.predict()
+
+        self.age += 1
+        self.time_since_update += 1
+
+    def update(self, new_track, frame_id):
+        self.frame_id = frame_id
+
+        new_tlwh = new_track.tlwh
+        self.score = new_track.score
+
+        self.kalman_filter.update(self.convert_func(new_tlwh))
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+        self.time_since_update = 0
+
+        # update velocity and history buffer
+        new_tlbr = Tracklet_w_bbox_buffer.tlwh_to_tlbr(new_tlwh)
+
+        if self.last_observation.sum() >= 0:  # no previous observation
+            previous_box = None
+            for i in range(self.delta_t):
+                dt = self.delta_t - i
+                if self.age - dt in self.observations:
+                    previous_box = self.observations[self.age-dt]
+                    break
+            if previous_box is None:
+                previous_box = self.last_observation
+            """
+                Estimate the track speed direction with observations \Delta t steps away
+            """
+            self.velocity = self.speed_direction(previous_box, new_tlbr)
+
+        new_observation = np.r_[new_tlbr, new_track.score]
+        self.last_observation = new_observation
+        self.observations[self.age] = new_observation
+        self.history_observations.append(new_observation)
+
+
+    
+
+class Tracklet_w_bbox_buffer(Tracklet):
+    """
+    Tracklet class with buffer of bbox, for C_BIoU track.
+    """
+    def __init__(self, tlwh, score, category, motion='byte'):
+        super().__init__(tlwh, score, category, motion)
+
+        # params in motion state
+        self.b1, self.b2, self.n = 0.3, 0.5, 5
+        self.origin_bbox_buffer = deque()  # a deque store the original bbox(tlwh) from t - self.n to t, where t is the last time detected
+        self.origin_bbox_buffer.append(self._tlwh)
+        # buffered bbox, two buffer sizes
+        self.buffer_bbox1 = self.get_buffer_bbox(level=1)
+        self.buffer_bbox2 = self.get_buffer_bbox(level=2)
+        # motion state, s^{t + \delta} = o^t + (\delta / n) * \sum_{i=t-n+1}^t(o^i - o^{i-1}) = o^t + (\delta / n) * (o^t - o^{t - n})
+        self.motion_state1 = self.buffer_bbox1.copy()
+        self.motion_state2 = self.buffer_bbox2.copy()
+
+    def get_buffer_bbox(self, level=1, bbox=None):
+        """
+        get buffered bbox as: (top, left, w, h) -> (top - bw, y - bh, w + 2bw, h + 2bh)
+        level = 1: b = self.b1  level = 2: b = self.b2
+        bbox: if not None, use bbox to calculate buffer_bbox, else use self._tlwh
+        """
+        assert level in [1, 2], 'level must be 1 or 2'
+
+        b = self.b1 if level == 1 else self.b2
+
+        if bbox is None:
+            buffer_bbox = self._tlwh + np.array([-b*self._tlwh[2], -b*self._tlwh[3], 2*b*self._tlwh[2], 2*b*self._tlwh[3]])
+        else:
+            buffer_bbox = bbox + np.array([-b*bbox[2], -b*bbox[3], 2*b*bbox[2], 2*b*bbox[3]])
+        return np.maximum(0.0, buffer_bbox)
+    
+    def re_activate(self, new_track, frame_id, new_id=False):
+        
+        # TODO different convert
+        self.kalman_filter.update(self.convert_func(new_track.tlwh))
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+        self.frame_id = frame_id
+        if new_id:
+            self.track_id = self.next_id()
+        self.score = new_track.score
+
+        self._tlwh = new_track._tlwh
+        # update stored bbox
+        if (len(self.origin_bbox_buffer) > self.n):
+            self.origin_bbox_buffer.popleft()
+            self.origin_bbox_buffer.append(self._tlwh)
+        else:
+            self.origin_bbox_buffer.append(self._tlwh)
+
+        self.buffer_bbox1 = self.get_buffer_bbox(level=1)
+        self.buffer_bbox2 = self.get_buffer_bbox(level=2)
+        self.motion_state1 = self.buffer_bbox1.copy()
+        self.motion_state2 = self.buffer_bbox2.copy()
+
+    def update(self, new_track, frame_id):
+        self.frame_id = frame_id
+
+        new_tlwh = new_track.tlwh
+        self.score = new_track.score
+
+        self.kalman_filter.update(self.convert_func(new_tlwh))
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+
+        self.time_since_update = 0
+
+        # update stored bbox
+        if (len(self.origin_bbox_buffer) > self.n):
+            self.origin_bbox_buffer.popleft()
+            self.origin_bbox_buffer.append(new_tlwh)
+        else:
+            self.origin_bbox_buffer.append(new_tlwh)
+
+        # update motion state
+        if self.time_since_update:  # have some unmatched frames
+            if len(self.origin_bbox_buffer) < self.n:
+                self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh)
+                self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh)
+            else:  # s^{t + \delta} = o^t + (\delta / n) * (o^t - o^{t - n})
+                motion_state = self.origin_bbox_buffer[-1] + \
+                    (self.time_since_update / self.n) * (self.origin_bbox_buffer[-1] - self.origin_bbox_buffer[0])
+                self.motion_state1 = self.get_buffer_bbox(level=1, bbox=motion_state)
+                self.motion_state2 = self.get_buffer_bbox(level=2, bbox=motion_state)
+
+        else:  # no unmatched frames, use current detection as motion state
+            self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh)
+            self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh)
+
+
+class Tracklet_w_depth(Tracklet):
+    """
+    tracklet with depth info (i.e., 2000 - y2), for SparseTrack
+    """
+
+    def __init__(self, tlwh, score, category, motion='byte'):
+        super().__init__(tlwh, score, category, motion)
+
+
+    @property
+    # @jit(nopython=True)
+    def deep_vec(self):
+        """Convert bounding box to format `((top left, bottom right)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        cx = ret[0] + 0.5 * ret[2]
+        y2 = ret[1] +  ret[3]
+        lendth = 2000 - y2
+        return np.asarray([cx, y2, lendth], dtype=np.float)
--- a/test/yolov7-tracker/tracker/trackeval/init.py
+++ b/test/yolov7-tracker/tracker/trackeval/init.py
@@ -0,0 +1,5 @@
+from .eval import Evaluator
+from . import datasets
+from . import metrics
+from . import plotting
+from . import utils
--- a/test/yolov7-tracker/tracker/trackeval/_timing.py
+++ b/test/yolov7-tracker/tracker/trackeval/_timing.py
@@ -0,0 +1,65 @@
+from functools import wraps
+from time import perf_counter
+import inspect
+
+DO_TIMING = False
+DISPLAY_LESS_PROGRESS = False
+timer_dict = {}
+counter = 0
+
+
+def time(f):
+    @wraps(f)
+    def wrap(*args, **kw):
+        if DO_TIMING:
+            # Run function with timing
+            ts = perf_counter()
+            result = f(*args, **kw)
+            te = perf_counter()
+            tt = te-ts
+
+            # Get function name
+            arg_names = inspect.getfullargspec(f)[0]
+            if arg_names[0] == 'self' and DISPLAY_LESS_PROGRESS:
+                return result
+            elif arg_names[0] == 'self':
+                method_name = type(args[0]).__name__ + '.' + f.__name__
+            else:
+                method_name = f.__name__
+
+            # Record accumulative time in each function for analysis
+            if method_name in timer_dict.keys():
+                timer_dict[method_name] += tt
+            else:
+                timer_dict[method_name] = tt
+
+            # If code is finished, display timing summary
+            if method_name == "Evaluator.evaluate":
+                print("")
+                print("Timing analysis:")
+                for key, value in timer_dict.items():
+                    print('%-70s %2.4f sec' % (key, value))
+            else:
+                # Get function argument values for printing special arguments of interest
+                arg_titles = ['tracker', 'seq', 'cls']
+                arg_vals = []
+                for i, a in enumerate(arg_names):
+                    if a in arg_titles:
+                        arg_vals.append(args[i])
+                arg_text = '(' + ', '.join(arg_vals) + ')'
+
+                # Display methods and functions with different indentation.
+                if arg_names[0] == 'self':
+                    print('%-74s %2.4f sec' % (' '*4 + method_name + arg_text, tt))
+                elif arg_names[0] == 'test':
+                    pass
+                else:
+                    global counter
+                    counter += 1
+                    print('%i %-70s %2.4f sec' % (counter, method_name + arg_text, tt))
+
+            return result
+        else:
+            # If config["TIME_PROGRESS"] is false, or config["USE_PARALLEL"] is true, run functions normally without timing.
+            return f(*args, **kw)
+    return wrap
--- a/test/yolov7-tracker/tracker/trackeval/baselines/init.py
+++ b/test/yolov7-tracker/tracker/trackeval/baselines/init.py
@@ -0,0 +1,6 @@
+import baseline_utils
+import stp
+import non_overlap
+import pascal_colormap
+import thresholder
+import vizualize
--- a/test/yolov7-tracker/tracker/trackeval/baselines/baseline_utils.py
+++ b/test/yolov7-tracker/tracker/trackeval/baselines/baseline_utils.py
@@ -0,0 +1,321 @@
+
+import os
+import csv
+import numpy as np
+from copy import deepcopy
+from PIL import Image
+from pycocotools import mask as mask_utils
+from scipy.optimize import linear_sum_assignment
+from trackeval.baselines.pascal_colormap import pascal_colormap
+
+
+def load_seq(file_to_load):
+    """ Load input data from file in RobMOTS format (e.g. provided detections).
+    Returns: Data object with the following structure (see STP :
+        data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
+    """
+    fp = open(file_to_load)
+    dialect = csv.Sniffer().sniff(fp.readline(), delimiters=' ')
+    dialect.skipinitialspace = True
+    fp.seek(0)
+    reader = csv.reader(fp, dialect)
+    read_data = {}
+    num_timesteps = 0
+    for i, row in enumerate(reader):
+        if row[-1] in '':
+            row = row[:-1]
+        t = int(row[0])
+        cid = row[1]
+        c = int(row[2])
+        s = row[3]
+        h = row[4]
+        w = row[5]
+        rle = row[6]
+
+        if t >= num_timesteps:
+            num_timesteps = t + 1
+
+        if c in read_data.keys():
+            if t in read_data[c].keys():
+                read_data[c][t]['ids'].append(cid)
+                read_data[c][t]['scores'].append(s)
+                read_data[c][t]['im_hs'].append(h)
+                read_data[c][t]['im_ws'].append(w)
+                read_data[c][t]['mask_rles'].append(rle)
+            else:
+                read_data[c][t] = {}
+                read_data[c][t]['ids'] = [cid]
+                read_data[c][t]['scores'] = [s]
+                read_data[c][t]['im_hs'] = [h]
+                read_data[c][t]['im_ws'] = [w]
+                read_data[c][t]['mask_rles'] = [rle]
+        else:
+            read_data[c] = {t: {}}
+            read_data[c][t]['ids'] = [cid]
+            read_data[c][t]['scores'] = [s]
+            read_data[c][t]['im_hs'] = [h]
+            read_data[c][t]['im_ws'] = [w]
+            read_data[c][t]['mask_rles'] = [rle]
+    fp.close()
+
+    data = {}
+    for c in read_data.keys():
+        data[c] = [{} for _ in range(num_timesteps)]
+        for t in range(num_timesteps):
+            if t in read_data[c].keys():
+                data[c][t]['ids'] = np.atleast_1d(read_data[c][t]['ids']).astype(int)
+                data[c][t]['scores'] = np.atleast_1d(read_data[c][t]['scores']).astype(float)
+                data[c][t]['im_hs'] = np.atleast_1d(read_data[c][t]['im_hs']).astype(int)
+                data[c][t]['im_ws'] = np.atleast_1d(read_data[c][t]['im_ws']).astype(int)
+                data[c][t]['mask_rles'] = np.atleast_1d(read_data[c][t]['mask_rles']).astype(str)
+            else:
+                data[c][t]['ids'] = np.empty(0).astype(int)
+                data[c][t]['scores'] = np.empty(0).astype(float)
+                data[c][t]['im_hs'] = np.empty(0).astype(int)
+                data[c][t]['im_ws'] = np.empty(0).astype(int)
+                data[c][t]['mask_rles'] = np.empty(0).astype(str)
+    return data
+
+
+def threshold(tdata, thresh):
+    """ Removes detections below a certian threshold ('thresh') score. """
+    new_data = {}
+    to_keep = tdata['scores'] > thresh
+    for field in ['ids', 'scores', 'im_hs', 'im_ws', 'mask_rles']:
+        new_data[field] = tdata[field][to_keep]
+    return new_data
+
+
+def create_coco_mask(mask_rles, im_hs, im_ws):
+    """ Converts mask as rle text (+ height and width) to encoded version used by pycocotools. """
+    coco_masks = [{'size': [h, w], 'counts': m.encode(encoding='UTF-8')}
+                  for h, w, m in zip(im_hs, im_ws, mask_rles)]
+    return coco_masks
+
+
+def mask_iou(mask_rles1, mask_rles2, im_hs, im_ws, do_ioa=0):
+    """ Calculate mask IoU between two masks.
+    Further allows 'intersection over area' instead of IoU (over the area of mask_rle1).
+    Allows either to pass in 1 boolean for do_ioa for all mask_rles2 or also one for each mask_rles2.
+    It is recommended that mask_rles1 is a detection and mask_rles2 is a groundtruth.
+    """
+    coco_masks1 = create_coco_mask(mask_rles1, im_hs, im_ws)
+    coco_masks2 = create_coco_mask(mask_rles2, im_hs, im_ws)
+
+    if not hasattr(do_ioa, "__len__"):
+        do_ioa = [do_ioa]*len(coco_masks2)
+    assert(len(coco_masks2) == len(do_ioa))
+    if len(coco_masks1) == 0 or len(coco_masks2) == 0:
+        iou = np.zeros(len(coco_masks1), len(coco_masks2))
+    else:
+        iou = mask_utils.iou(coco_masks1, coco_masks2, do_ioa)
+    return iou
+
+
+def sort_by_score(t_data):
+    """ Sorts data by score """
+    sort_index = np.argsort(t_data['scores'])[::-1]
+    for k in t_data.keys():
+        t_data[k] = t_data[k][sort_index]
+    return t_data
+
+
+def mask_NMS(t_data, nms_threshold=0.5, already_sorted=False):
+    """ Remove redundant masks by performing non-maximum suppression (NMS) """
+
+    # Sort by score
+    if not already_sorted:
+        t_data = sort_by_score(t_data)
+
+    #  Calculate the mask IoU between all detections in the timestep.
+    mask_ious_all = mask_iou(t_data['mask_rles'], t_data['mask_rles'], t_data['im_hs'], t_data['im_ws'])
+
+    # Determine which masks NMS should remove
+    # (those overlapping greater than nms_threshold with another mask that has a higher score)
+    num_dets = len(t_data['mask_rles'])
+    to_remove = [False for _ in range(num_dets)]
+    for i in range(num_dets):
+        if not to_remove[i]:
+            for j in range(i + 1, num_dets):
+                if mask_ious_all[i, j] > nms_threshold:
+                    to_remove[j] = True
+
+    # Remove detections which should be removed
+    to_keep = np.logical_not(to_remove)
+    for k in t_data.keys():
+        t_data[k] = t_data[k][to_keep]
+
+    return t_data
+
+
+def non_overlap(t_data, already_sorted=False):
+    """ Enforces masks to be non-overlapping in an image, does this by putting masks 'on top of one another',
+    such that higher score masks 'occlude' and thus remove parts of lower scoring masks.
+
+    Help wanted: if anyone knows a way to do this WITHOUT converting the RLE to the np.array let me know, because that
+    would be MUCH more efficient. (I have tried, but haven't yet had success).
+    """
+
+    # Sort by score
+    if not already_sorted:
+        t_data = sort_by_score(t_data)
+
+    # Get coco masks
+    coco_masks = create_coco_mask(t_data['mask_rles'], t_data['im_hs'], t_data['im_ws'])
+
+    # Create a single np.array to hold all of the non-overlapping mask
+    masks_array = np.zeros((t_data['im_hs'][0], t_data['im_ws'][0]), 'uint8')
+
+    # Decode each mask into a np.array, and place it into the overall array for the whole frame.
+    # Since masks with the lowest score are placed first, they are 'partially overridden' by masks with a higher score
+    # if they overlap.
+    for i, mask in enumerate(coco_masks[::-1]):
+        masks_array[mask_utils.decode(mask).astype('bool')] = i + 1
+
+    # Encode the resulting np.array back into a set of coco_masks which are now non-overlapping.
+    num_dets = len(coco_masks)
+    for i, j in enumerate(range(1, num_dets + 1)[::-1]):
+        coco_masks[i] = mask_utils.encode(np.asfortranarray(masks_array == j, dtype=np.uint8))
+
+    # Convert from coco_mask back into our mask_rle format.
+    t_data['mask_rles'] = [m['counts'].decode("utf-8") for m in coco_masks]
+
+    return t_data
+
+
+def masks2boxes(mask_rles, im_hs, im_ws):
+    """ Extracts bounding boxes which surround a set of masks. """
+    coco_masks = create_coco_mask(mask_rles, im_hs, im_ws)
+    boxes = np.array([mask_utils.toBbox(x) for x in coco_masks])
+    if len(boxes) == 0:
+        boxes = np.empty((0, 4))
+    return boxes
+
+
+def box_iou(bboxes1, bboxes2, box_format='xywh', do_ioa=False, do_giou=False):
+    """ Calculates the IOU (intersection over union) between two arrays of boxes.
+    Allows variable box formats ('xywh' and 'x0y0x1y1').
+    If do_ioa (intersection over area), then calculates the intersection over the area of boxes1 - this is commonly
+    used to determine if detections are within crowd ignore region.
+    If do_giou (generalized intersection over union, then calculates giou.
+    """
+    if len(bboxes1) == 0 or len(bboxes2) == 0:
+        ious = np.zeros((len(bboxes1), len(bboxes2)))
+        return ious
+    if box_format in 'xywh':
+        # layout: (x0, y0, w, h)
+        bboxes1 = deepcopy(bboxes1)
+        bboxes2 = deepcopy(bboxes2)
+
+        bboxes1[:, 2] = bboxes1[:, 0] + bboxes1[:, 2]
+        bboxes1[:, 3] = bboxes1[:, 1] + bboxes1[:, 3]
+        bboxes2[:, 2] = bboxes2[:, 0] + bboxes2[:, 2]
+        bboxes2[:, 3] = bboxes2[:, 1] + bboxes2[:, 3]
+    elif box_format not in 'x0y0x1y1':
+        raise (Exception('box_format %s is not implemented' % box_format))
+
+    # layout: (x0, y0, x1, y1)
+    min_ = np.minimum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
+    max_ = np.maximum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
+    intersection = np.maximum(min_[..., 2] - max_[..., 0], 0) * np.maximum(min_[..., 3] - max_[..., 1], 0)
+    area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
+
+    if do_ioa:
+        ioas = np.zeros_like(intersection)
+        valid_mask = area1 > 0 + np.finfo('float').eps
+        ioas[valid_mask, :] = intersection[valid_mask, :] / area1[valid_mask][:, np.newaxis]
+
+        return ioas
+    else:
+        area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
+        union = area1[:, np.newaxis] + area2[np.newaxis, :] - intersection
+        intersection[area1 <= 0 + np.finfo('float').eps, :] = 0
+        intersection[:, area2 <= 0 + np.finfo('float').eps] = 0
+        intersection[union <= 0 + np.finfo('float').eps] = 0
+        union[union <= 0 + np.finfo('float').eps] = 1
+        ious = intersection / union
+
+    if do_giou:
+        enclosing_area = np.maximum(max_[..., 2] - min_[..., 0], 0) * np.maximum(max_[..., 3] - min_[..., 1], 0)
+        eps = 1e-7
+        # giou
+        ious = ious - ((enclosing_area - union) / (enclosing_area + eps))
+
+    return ious
+
+
+def match(match_scores):
+    match_rows, match_cols = linear_sum_assignment(-match_scores)
+    return match_rows, match_cols
+
+
+def write_seq(output_data, out_file):
+    out_loc = os.path.dirname(out_file)
+    if not os.path.exists(out_loc):
+        os.makedirs(out_loc, exist_ok=True)
+    fp = open(out_file, 'w', newline='')
+    writer = csv.writer(fp, delimiter=' ')
+    for row in output_data:
+        writer.writerow(row)
+    fp.close()
+
+
+def combine_classes(data):
+    """ Converts data from a class-separated to a class-combined format.
+    Input format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
+    Output format: data[t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles', 'cls'}
+    """
+    output_data = [{} for _ in list(data.values())[0]]
+    for cls, cls_data in data.items():
+        for timestep, t_data in enumerate(cls_data):
+            for k in t_data.keys():
+                if k in output_data[timestep].keys():
+                    output_data[timestep][k] += list(t_data[k])
+                else:
+                    output_data[timestep][k] = list(t_data[k])
+            if 'cls' in output_data[timestep].keys():
+                output_data[timestep]['cls'] += [cls]*len(output_data[timestep]['ids'])
+            else:
+                output_data[timestep]['cls'] = [cls]*len(output_data[timestep]['ids'])
+
+    for timestep, t_data in enumerate(output_data):
+        for k in t_data.keys():
+            output_data[timestep][k] = np.array(output_data[timestep][k])
+
+    return output_data
+
+
+def save_as_png(t_data, out_file, im_h, im_w):
+    """ Save a set of segmentation masks into a PNG format, the same as used for the DAVIS dataset."""
+
+    if len(t_data['mask_rles']) > 0:
+        coco_masks = create_coco_mask(t_data['mask_rles'], t_data['im_hs'], t_data['im_ws'])
+
+        list_of_np_masks = [mask_utils.decode(mask) for mask in coco_masks]
+
+        png = np.zeros((t_data['im_hs'][0], t_data['im_ws'][0]))
+        for mask, c_id in zip(list_of_np_masks, t_data['ids']):
+            png[mask.astype("bool")] = c_id + 1
+    else:
+        png = np.zeros((im_h, im_w))
+
+    if not os.path.exists(os.path.dirname(out_file)):
+        os.makedirs(os.path.dirname(out_file))
+
+    colmap = (np.array(pascal_colormap) * 255).round().astype("uint8")
+    palimage = Image.new('P', (16, 16))
+    palimage.putpalette(colmap)
+    im = Image.fromarray(np.squeeze(png.astype("uint8")))
+    im2 = im.quantize(palette=palimage)
+    im2.save(out_file)
+
+
+def get_frame_size(data):
+    """ Gets frame height and width from data. """
+    for cls, cls_data in data.items():
+        for timestep, t_data in enumerate(cls_data):
+            if len(t_data['im_hs'] > 0):
+                im_h = t_data['im_hs'][0]
+                im_w = t_data['im_ws'][0]
+                return im_h, im_w
+    return None
--- a/test/yolov7-tracker/tracker/trackeval/baselines/non_overlap.py
+++ b/test/yolov7-tracker/tracker/trackeval/baselines/non_overlap.py
@@ -0,0 +1,92 @@
+"""
+Non-Overlap: Code to take in a set of raw detections and produce a set of non-overlapping detections from it.
+
+Author: Jonathon Luiten
+"""
+
+import os
+import sys
+from multiprocessing.pool import Pool
+from multiprocessing import freeze_support
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+from trackeval.baselines import baseline_utils as butils
+from trackeval.utils import get_code_path
+
+code_path = get_code_path()
+config = {
+    'INPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/raw_supplied/data/'),
+    'OUTPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/non_overlap_supplied/data/'),
+    'SPLIT': 'train',  # valid: 'train', 'val', 'test'.
+    'Benchmarks': None,  # If None, all benchmarks in SPLIT.
+
+    'Num_Parallel_Cores': None,  # If None, run without parallel.
+
+    'THRESHOLD_NMS_MASK_IOU': 0.5,
+}
+
+
+def do_sequence(seq_file):
+
+    # Load input data from file (e.g. provided detections)
+    # data format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
+    data = butils.load_seq(seq_file)
+
+    # Converts data from a class-separated to a class-combined format.
+    # data[t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles', 'cls'}
+    data = butils.combine_classes(data)
+
+    # Where to accumulate output data for writing out
+    output_data = []
+
+    # Run for each timestep.
+    for timestep, t_data in enumerate(data):
+
+        # Remove redundant masks by performing non-maximum suppression (NMS)
+        t_data = butils.mask_NMS(t_data, nms_threshold=config['THRESHOLD_NMS_MASK_IOU'])
+
+        # Perform non-overlap, to get non_overlapping masks.
+        t_data = butils.non_overlap(t_data, already_sorted=True)
+
+        # Save result in output format to write to file later.
+        # Output Format = [timestep ID class score im_h im_w mask_RLE]
+        for i in range(len(t_data['ids'])):
+            row = [timestep, int(t_data['ids'][i]), t_data['cls'][i], t_data['scores'][i], t_data['im_hs'][i],
+                   t_data['im_ws'][i], t_data['mask_rles'][i]]
+            output_data.append(row)
+
+    # Write results to file
+    out_file = seq_file.replace(config['INPUT_FOL'].format(split=config['SPLIT']),
+                                config['OUTPUT_FOL'].format(split=config['SPLIT']))
+    butils.write_seq(output_data, out_file)
+
+    print('DONE:', seq_file)
+
+
+if __name__ == '__main__':
+
+    # Required to fix bug in multiprocessing on windows.
+    freeze_support()
+
+    # Obtain list of sequences to run tracker for.
+    if config['Benchmarks']:
+        benchmarks = config['Benchmarks']
+    else:
+        benchmarks = ['davis_unsupervised', 'kitti_mots', 'youtube_vis', 'ovis', 'bdd_mots', 'tao']
+        if config['SPLIT'] != 'train':
+            benchmarks += ['waymo', 'mots_challenge']
+    seqs_todo = []
+    for bench in benchmarks:
+        bench_fol = os.path.join(config['INPUT_FOL'].format(split=config['SPLIT']), bench)
+        seqs_todo += [os.path.join(bench_fol, seq) for seq in os.listdir(bench_fol)]
+
+    # Run in parallel
+    if config['Num_Parallel_Cores']:
+        with Pool(config['Num_Parallel_Cores']) as pool:
+            results = pool.map(do_sequence, seqs_todo)
+
+    # Run in series
+    else:
+        for seq_todo in seqs_todo:
+            do_sequence(seq_todo)
+
--- a/test/yolov7-tracker/tracker/trackeval/baselines/pascal_colormap.py
+++ b/test/yolov7-tracker/tracker/trackeval/baselines/pascal_colormap.py
@@ -0,0 +1,257 @@
+pascal_colormap = [
+    0     ,         0,         0,
+    0.5020,         0,         0,
+         0,    0.5020,         0,
+    0.5020,    0.5020,         0,
+         0,         0,    0.5020,
+    0.5020,         0,    0.5020,
+         0,    0.5020,    0.5020,
+    0.5020,    0.5020,    0.5020,
+    0.2510,         0,         0,
+    0.7529,         0,         0,
+    0.2510,    0.5020,         0,
+    0.7529,    0.5020,         0,
+    0.2510,         0,    0.5020,
+    0.7529,         0,    0.5020,
+    0.2510,    0.5020,    0.5020,
+    0.7529,    0.5020,    0.5020,
+         0,    0.2510,         0,
+    0.5020,    0.2510,         0,
+         0,    0.7529,         0,
+    0.5020,    0.7529,         0,
+         0,    0.2510,    0.5020,
+    0.5020,    0.2510,    0.5020,
+         0,    0.7529,    0.5020,
+    0.5020,    0.7529,    0.5020,
+    0.2510,    0.2510,         0,
+    0.7529,    0.2510,         0,
+    0.2510,    0.7529,         0,
+    0.7529,    0.7529,         0,
+    0.2510,    0.2510,    0.5020,
+    0.7529,    0.2510,    0.5020,
+    0.2510,    0.7529,    0.5020,
+    0.7529,    0.7529,    0.5020,
+         0,         0,    0.2510,
+    0.5020,         0,    0.2510,
+         0,    0.5020,    0.2510,
+    0.5020,    0.5020,    0.2510,
+         0,         0,    0.7529,
+    0.5020,         0,    0.7529,
+         0,    0.5020,    0.7529,
+    0.5020,    0.5020,    0.7529,
+    0.2510,         0,    0.2510,
+    0.7529,         0,    0.2510,
+    0.2510,    0.5020,    0.2510,
+    0.7529,    0.5020,    0.2510,
+    0.2510,         0,    0.7529,
+    0.7529,         0,    0.7529,
+    0.2510,    0.5020,    0.7529,
+    0.7529,    0.5020,    0.7529,
+         0,    0.2510,    0.2510,
+    0.5020,    0.2510,    0.2510,
+         0,    0.7529,    0.2510,
+    0.5020,    0.7529,    0.2510,
+         0,    0.2510,    0.7529,
+    0.5020,    0.2510,    0.7529,
+         0,    0.7529,    0.7529,
+    0.5020,    0.7529,    0.7529,
+    0.2510,    0.2510,    0.2510,
+    0.7529,    0.2510,    0.2510,
+    0.2510,    0.7529,    0.2510,
+    0.7529,    0.7529,    0.2510,
+    0.2510,    0.2510,    0.7529,
+    0.7529,    0.2510,    0.7529,
+    0.2510,    0.7529,    0.7529,
+    0.7529,    0.7529,    0.7529,
+    0.1255,         0,         0,
+    0.6275,         0,         0,
+    0.1255,    0.5020,         0,
+    0.6275,    0.5020,         0,
+    0.1255,         0,    0.5020,
+    0.6275,         0,    0.5020,
+    0.1255,    0.5020,    0.5020,
+    0.6275,    0.5020,    0.5020,
+    0.3765,         0,         0,
+    0.8784,         0,         0,
+    0.3765,    0.5020,         0,
+    0.8784,    0.5020,         0,
+    0.3765,         0,    0.5020,
+    0.8784,         0,    0.5020,
+    0.3765,    0.5020,    0.5020,
+    0.8784,    0.5020,    0.5020,
+    0.1255,    0.2510,         0,
+    0.6275,    0.2510,         0,
+    0.1255,    0.7529,         0,
+    0.6275,    0.7529,         0,
+    0.1255,    0.2510,    0.5020,
+    0.6275,    0.2510,    0.5020,
+    0.1255,    0.7529,    0.5020,
+    0.6275,    0.7529,    0.5020,
+    0.3765,    0.2510,         0,
+    0.8784,    0.2510,         0,
+    0.3765,    0.7529,         0,
+    0.8784,    0.7529,         0,
+    0.3765,    0.2510,    0.5020,
+    0.8784,    0.2510,    0.5020,
+    0.3765,    0.7529,    0.5020,
+    0.8784,    0.7529,    0.5020,
+    0.1255,         0,    0.2510,
+    0.6275,         0,    0.2510,
+    0.1255,    0.5020,    0.2510,
+    0.6275,    0.5020,    0.2510,
+    0.1255,         0,    0.7529,
+    0.6275,         0,    0.7529,
+    0.1255,    0.5020,    0.7529,
+    0.6275,    0.5020,    0.7529,
+    0.3765,         0,    0.2510,
+    0.8784,         0,    0.2510,
+    0.3765,    0.5020,    0.2510,
+    0.8784,    0.5020,    0.2510,
+    0.3765,         0,    0.7529,
+    0.8784,         0,    0.7529,
+    0.3765,    0.5020,    0.7529,
+    0.8784,    0.5020,    0.7529,
+    0.1255,    0.2510,    0.2510,
+    0.6275,    0.2510,    0.2510,
+    0.1255,    0.7529,    0.2510,
+    0.6275,    0.7529,    0.2510,
+    0.1255,    0.2510,    0.7529,
+    0.6275,    0.2510,    0.7529,
+    0.1255,    0.7529,    0.7529,
+    0.6275,    0.7529,    0.7529,
+    0.3765,    0.2510,    0.2510,
+    0.8784,    0.2510,    0.2510,
+    0.3765,    0.7529,    0.2510,
+    0.8784,    0.7529,    0.2510,
+    0.3765,    0.2510,    0.7529,
+    0.8784,    0.2510,    0.7529,
+    0.3765,    0.7529,    0.7529,
+    0.8784,    0.7529,    0.7529,
+         0,    0.1255,         0,
+    0.5020,    0.1255,         0,
+         0,    0.6275,         0,
+    0.5020,    0.6275,         0,
+         0,    0.1255,    0.5020,
+    0.5020,    0.1255,    0.5020,
+         0,    0.6275,    0.5020,
+    0.5020,    0.6275,    0.5020,
+    0.2510,    0.1255,         0,
+    0.7529,    0.1255,         0,
+    0.2510,    0.6275,         0,
+    0.7529,    0.6275,         0,
+    0.2510,    0.1255,    0.5020,
+    0.7529,    0.1255,    0.5020,
+    0.2510,    0.6275,    0.5020,
+    0.7529,    0.6275,    0.5020,
+         0,    0.3765,         0,
+    0.5020,    0.3765,         0,
+         0,    0.8784,         0,
+    0.5020,    0.8784,         0,
+         0,    0.3765,    0.5020,
+    0.5020,    0.3765,    0.5020,
+         0,    0.8784,    0.5020,
+    0.5020,    0.8784,    0.5020,
+    0.2510,    0.3765,         0,
+    0.7529,    0.3765,         0,
+    0.2510,    0.8784,         0,
+    0.7529,    0.8784,         0,
+    0.2510,    0.3765,    0.5020,
+    0.7529,    0.3765,    0.5020,
+    0.2510,    0.8784,    0.5020,
+    0.7529,    0.8784,    0.5020,
+         0,    0.1255,    0.2510,
+    0.5020,    0.1255,    0.2510,
+         0,    0.6275,    0.2510,
+    0.5020,    0.6275,    0.2510,
+         0,    0.1255,    0.7529,
+    0.5020,    0.1255,    0.7529,
+         0,    0.6275,    0.7529,
+    0.5020,    0.6275,    0.7529,
+    0.2510,    0.1255,    0.2510,
+    0.7529,    0.1255,    0.2510,
+    0.2510,    0.6275,    0.2510,
+    0.7529,    0.6275,    0.2510,
+    0.2510,    0.1255,    0.7529,
+    0.7529,    0.1255,    0.7529,
+    0.2510,    0.6275,    0.7529,
+    0.7529,    0.6275,    0.7529,
+         0,    0.3765,    0.2510,
+    0.5020,    0.3765,    0.2510,
+         0,    0.8784,    0.2510,
+    0.5020,    0.8784,    0.2510,
+         0,    0.3765,    0.7529,
+    0.5020,    0.3765,    0.7529,
+         0,    0.8784,    0.7529,
+    0.5020,    0.8784,    0.7529,
+    0.2510,    0.3765,    0.2510,
+    0.7529,    0.3765,    0.2510,
+    0.2510,    0.8784,    0.2510,
+    0.7529,    0.8784,    0.2510,
+    0.2510,    0.3765,    0.7529,
+    0.7529,    0.3765,    0.7529,
+    0.2510,    0.8784,    0.7529,
+    0.7529,    0.8784,    0.7529,
+    0.1255,    0.1255,         0,
+    0.6275,    0.1255,         0,
+    0.1255,    0.6275,         0,
+    0.6275,    0.6275,         0,
+    0.1255,    0.1255,    0.5020,
+    0.6275,    0.1255,    0.5020,
+    0.1255,    0.6275,    0.5020,
+    0.6275,    0.6275,    0.5020,
+    0.3765,    0.1255,         0,
+    0.8784,    0.1255,         0,
+    0.3765,    0.6275,         0,
+    0.8784,    0.6275,         0,
+    0.3765,    0.1255,    0.5020,
+    0.8784,    0.1255,    0.5020,
+    0.3765,    0.6275,    0.5020,
+    0.8784,    0.6275,    0.5020,
+    0.1255,    0.3765,         0,
+    0.6275,    0.3765,         0,
+    0.1255,    0.8784,         0,
+    0.6275,    0.8784,         0,
+    0.1255,    0.3765,    0.5020,
+    0.6275,    0.3765,    0.5020,
+    0.1255,    0.8784,    0.5020,
+    0.6275,    0.8784,    0.5020,
+    0.3765,    0.3765,         0,
+    0.8784,    0.3765,         0,
+    0.3765,    0.8784,         0,
+    0.8784,    0.8784,         0,
+    0.3765,    0.3765,    0.5020,
+    0.8784,    0.3765,    0.5020,
+    0.3765,    0.8784,    0.5020,
+    0.8784,    0.8784,    0.5020,
+    0.1255,    0.1255,    0.2510,
+    0.6275,    0.1255,    0.2510,
+    0.1255,    0.6275,    0.2510,
+    0.6275,    0.6275,    0.2510,
+    0.1255,    0.1255,    0.7529,
+    0.6275,    0.1255,    0.7529,
+    0.1255,    0.6275,    0.7529,
+    0.6275,    0.6275,    0.7529,
+    0.3765,    0.1255,    0.2510,
+    0.8784,    0.1255,    0.2510,
+    0.3765,    0.6275,    0.2510,
+    0.8784,    0.6275,    0.2510,
+    0.3765,    0.1255,    0.7529,
+    0.8784,    0.1255,    0.7529,
+    0.3765,    0.6275,    0.7529,
+    0.8784,    0.6275,    0.7529,
+    0.1255,    0.3765,    0.2510,
+    0.6275,    0.3765,    0.2510,
+    0.1255,    0.8784,    0.2510,
+    0.6275,    0.8784,    0.2510,
+    0.1255,    0.3765,    0.7529,
+    0.6275,    0.3765,    0.7529,
+    0.1255,    0.8784,    0.7529,
+    0.6275,    0.8784,    0.7529,
+    0.3765,    0.3765,    0.2510,
+    0.8784,    0.3765,    0.2510,
+    0.3765,    0.8784,    0.2510,
+    0.8784,    0.8784,    0.2510,
+    0.3765,    0.3765,    0.7529,
+    0.8784,    0.3765,    0.7529,
+    0.3765,    0.8784,    0.7529,
+    0.8784,    0.8784,    0.7529]
--- a/test/yolov7-tracker/tracker/trackeval/baselines/stp.py
+++ b/test/yolov7-tracker/tracker/trackeval/baselines/stp.py
@@ -0,0 +1,144 @@
+"""
+STP: Simplest Tracker Possible
+
+Author: Jonathon Luiten
+
+This simple tracker, simply assigns track IDs which maximise the 'bounding box IoU' between previous tracks and current
+detections. It is also able to match detections to tracks at more than one timestep previously.
+"""
+
+import os
+import sys
+import numpy as np
+from multiprocessing.pool import Pool
+from multiprocessing import freeze_support
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+from trackeval.baselines import baseline_utils as butils
+from trackeval.utils import get_code_path
+
+code_path = get_code_path()
+config = {
+    'INPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/non_overlap_supplied/data/'),
+    'OUTPUT_FOL': os.path.join(code_path, 'data/trackers/rob_mots/{split}/STP/data/'),
+    'SPLIT': 'train',  # valid: 'train', 'val', 'test'.
+    'Benchmarks': None,  # If None, all benchmarks in SPLIT.
+
+    'Num_Parallel_Cores': None,  # If None, run without parallel.
+
+    'DETECTION_THRESHOLD': 0.5,
+    'ASSOCIATION_THRESHOLD': 1e-10,
+    'MAX_FRAMES_SKIP': 7
+}
+
+
+def track_sequence(seq_file):
+
+    # Load input data from file (e.g. provided detections)
+    # data format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
+    data = butils.load_seq(seq_file)
+
+    # Where to accumulate output data for writing out
+    output_data = []
+
+    # To ensure IDs are unique per object across all classes.
+    curr_max_id = 0
+
+    # Run tracker for each class.
+    for cls, cls_data in data.items():
+
+        # Initialize container for holding previously tracked objects.
+        prev = {'boxes': np.empty((0, 4)),
+                'ids': np.array([], np.int),
+                'timesteps': np.array([])}
+
+        # Run tracker for each timestep.
+        for timestep, t_data in enumerate(cls_data):
+
+            # Threshold detections.
+            t_data = butils.threshold(t_data, config['DETECTION_THRESHOLD'])
+
+            # Convert mask dets to bounding boxes.
+            boxes = butils.masks2boxes(t_data['mask_rles'], t_data['im_hs'], t_data['im_ws'])
+
+            # Calculate IoU between previous and current frame dets.
+            ious = butils.box_iou(prev['boxes'], boxes)
+
+            # Score which decreases quickly for previous dets depending on how many timesteps before they come from.
+            prev_timestep_scores = np.power(10, -1 * prev['timesteps'])
+
+            # Matching score is such that it first tries to match 'most recent timesteps',
+            # and within each timestep maximised IoU.
+            match_scores = prev_timestep_scores[:, np.newaxis] * ious
+
+            # Find best matching between current dets and previous tracks.
+            match_rows, match_cols = butils.match(match_scores)
+
+            # Remove matches that have an IoU below a certain threshold.
+            actually_matched_mask = ious[match_rows, match_cols] > config['ASSOCIATION_THRESHOLD']
+            match_rows = match_rows[actually_matched_mask]
+            match_cols = match_cols[actually_matched_mask]
+
+            # Assign the prev track ID to the current dets if they were matched.
+            ids = np.nan * np.ones((len(boxes),), np.int)
+            ids[match_cols] = prev['ids'][match_rows]
+
+            # Create new track IDs for dets that were not matched to previous tracks.
+            num_not_matched = len(ids) - len(match_cols)
+            new_ids = np.arange(curr_max_id + 1, curr_max_id + num_not_matched + 1)
+            ids[np.isnan(ids)] = new_ids
+
+            # Update maximum ID to ensure future added tracks have a unique ID value.
+            curr_max_id += num_not_matched
+
+            # Drop tracks from 'previous tracks' if they have not been matched in the last MAX_FRAMES_SKIP frames.
+            unmatched_rows = [i for i in range(len(prev['ids'])) if
+                              i not in match_rows and (prev['timesteps'][i] + 1 <= config['MAX_FRAMES_SKIP'])]
+
+            # Update the set of previous tracking results to include the newly tracked detections.
+            prev['ids'] = np.concatenate((ids, prev['ids'][unmatched_rows]), axis=0)
+            prev['boxes'] = np.concatenate((np.atleast_2d(boxes), np.atleast_2d(prev['boxes'][unmatched_rows])), axis=0)
+            prev['timesteps'] = np.concatenate((np.zeros((len(ids),)), prev['timesteps'][unmatched_rows] + 1), axis=0)
+
+            # Save result in output format to write to file later.
+            # Output Format = [timestep ID class score im_h im_w mask_RLE]
+            for i in range(len(t_data['ids'])):
+                row = [timestep, int(ids[i]), cls, t_data['scores'][i], t_data['im_hs'][i], t_data['im_ws'][i],
+                       t_data['mask_rles'][i]]
+                output_data.append(row)
+
+    # Write results to file
+    out_file = seq_file.replace(config['INPUT_FOL'].format(split=config['SPLIT']),
+                                config['OUTPUT_FOL'].format(split=config['SPLIT']))
+    butils.write_seq(output_data, out_file)
+
+    print('DONE:', seq_file)
+
+
+if __name__ == '__main__':
+
+    # Required to fix bug in multiprocessing on windows.
+    freeze_support()
+
+    # Obtain list of sequences to run tracker for.
+    if config['Benchmarks']:
+        benchmarks = config['Benchmarks']
+    else:
+        benchmarks = ['davis_unsupervised', 'kitti_mots', 'youtube_vis', 'ovis', 'bdd_mots', 'tao']
+        if config['SPLIT'] != 'train':
+            benchmarks += ['waymo', 'mots_challenge']
+    seqs_todo = []
+    for bench in benchmarks:
+        bench_fol = os.path.join(config['INPUT_FOL'].format(split=config['SPLIT']), bench)
+        seqs_todo += [os.path.join(bench_fol, seq) for seq in os.listdir(bench_fol)]
+
+    # Run in parallel
+    if config['Num_Parallel_Cores']:
+        with Pool(config['Num_Parallel_Cores']) as pool:
+            results = pool.map(track_sequence, seqs_todo)
+
+    # Run in series
+    else:
+        for seq_todo in seqs_todo:
+            track_sequence(seq_todo)
+
--- a/test/yolov7-tracker/tracker/trackeval/baselines/thresholder.py
+++ b/test/yolov7-tracker/tracker/trackeval/baselines/thresholder.py
@@ -0,0 +1,92 @@
+"""
+Thresholder
+
+Author: Jonathon Luiten
+
+Simply reads in a set of detection, thresholds them at a certain score threshold, and writes them out again.
+"""
+
+import os
+import sys
+from multiprocessing.pool import Pool
+from multiprocessing import freeze_support
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+from trackeval.baselines import baseline_utils as butils
+from trackeval.utils import get_code_path
+
+THRESHOLD = 0.2
+
+code_path = get_code_path()
+config = {
+    'INPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/non_overlap_supplied/data/'),
+    'OUTPUT_FOL': os.path.join(code_path, 'data/detections/rob_mots/{split}/threshold_' + str(100*THRESHOLD) + '/data/'),
+    'SPLIT': 'train',  # valid: 'train', 'val', 'test'.
+    'Benchmarks': None,  # If None, all benchmarks in SPLIT.
+
+    'Num_Parallel_Cores': None,  # If None, run without parallel.
+
+    'DETECTION_THRESHOLD': THRESHOLD,
+}
+
+
+def do_sequence(seq_file):
+
+    # Load input data from file (e.g. provided detections)
+    # data format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
+    data = butils.load_seq(seq_file)
+
+    # Where to accumulate output data for writing out
+    output_data = []
+
+    # Run for each class.
+    for cls, cls_data in data.items():
+
+        # Run for each timestep.
+        for timestep, t_data in enumerate(cls_data):
+
+            # Threshold detections.
+            t_data = butils.threshold(t_data, config['DETECTION_THRESHOLD'])
+
+            # Save result in output format to write to file later.
+            # Output Format = [timestep ID class score im_h im_w mask_RLE]
+            for i in range(len(t_data['ids'])):
+                row = [timestep, int(t_data['ids'][i]), cls, t_data['scores'][i], t_data['im_hs'][i],
+                       t_data['im_ws'][i], t_data['mask_rles'][i]]
+                output_data.append(row)
+
+    # Write results to file
+    out_file = seq_file.replace(config['INPUT_FOL'].format(split=config['SPLIT']),
+                                config['OUTPUT_FOL'].format(split=config['SPLIT']))
+    butils.write_seq(output_data, out_file)
+
+    print('DONE:', seq_todo)
+
+
+if __name__ == '__main__':
+
+    # Required to fix bug in multiprocessing on windows.
+    freeze_support()
+
+    # Obtain list of sequences to run tracker for.
+    if config['Benchmarks']:
+        benchmarks = config['Benchmarks']
+    else:
+        benchmarks = ['davis_unsupervised', 'kitti_mots', 'youtube_vis', 'ovis', 'bdd_mots', 'tao']
+        if config['SPLIT'] != 'train':
+            benchmarks += ['waymo', 'mots_challenge']
+    seqs_todo = []
+    for bench in benchmarks:
+        bench_fol = os.path.join(config['INPUT_FOL'].format(split=config['SPLIT']), bench)
+        seqs_todo += [os.path.join(bench_fol, seq) for seq in os.listdir(bench_fol)]
+
+    # Run in parallel
+    if config['Num_Parallel_Cores']:
+        with Pool(config['Num_Parallel_Cores']) as pool:
+            results = pool.map(do_sequence, seqs_todo)
+
+    # Run in series
+    else:
+        for seq_todo in seqs_todo:
+            do_sequence(seq_todo)
+
--- a/test/yolov7-tracker/tracker/trackeval/baselines/vizualize.py
+++ b/test/yolov7-tracker/tracker/trackeval/baselines/vizualize.py
@@ -0,0 +1,94 @@
+"""
+Vizualize: Code which converts .txt rle tracking results into a visual .png format.
+
+Author: Jonathon Luiten
+"""
+
+import os
+import sys
+from multiprocessing.pool import Pool
+from multiprocessing import freeze_support
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+from trackeval.baselines import baseline_utils as butils
+from trackeval.utils import get_code_path
+from trackeval.datasets.rob_mots_classmap import cls_id_to_name
+
+code_path = get_code_path()
+config = {
+    # Tracker format:
+    'INPUT_FOL': os.path.join(code_path, 'data/trackers/rob_mots/{split}/STP/data/{bench}'),
+    'OUTPUT_FOL': os.path.join(code_path, 'data/viz/rob_mots/{split}/STP/data/{bench}'),
+    # GT format:
+    # 'INPUT_FOL': os.path.join(code_path, 'data/gt/rob_mots/{split}/{bench}/data/'),
+    # 'OUTPUT_FOL': os.path.join(code_path, 'data/gt_viz/rob_mots/{split}/{bench}/'),
+    'SPLIT': 'train',  # valid: 'train', 'val', 'test'.
+    'Benchmarks': None,  # If None, all benchmarks in SPLIT.
+    'Num_Parallel_Cores': None,  # If None, run without parallel.
+}
+
+
+def do_sequence(seq_file):
+    # Folder to save resulting visualization in
+    out_fol = seq_file.replace(config['INPUT_FOL'].format(split=config['SPLIT'], bench=bench),
+                               config['OUTPUT_FOL'].format(split=config['SPLIT'], bench=bench)).replace('.txt', '')
+
+    # Load input data from file (e.g. provided detections)
+    # data format: data['cls'][t] = {'ids', 'scores', 'im_hs', 'im_ws', 'mask_rles'}
+    data = butils.load_seq(seq_file)
+
+    # Get frame size for visualizing empty frames
+    im_h, im_w = butils.get_frame_size(data)
+
+    # First run for each class.
+    for cls, cls_data in data.items():
+
+        if cls >= 100:
+            continue
+
+        # Run for each timestep.
+        for timestep, t_data in enumerate(cls_data):
+            # Save out visualization
+            out_file = os.path.join(out_fol, cls_id_to_name[cls], str(timestep).zfill(5) + '.png')
+            butils.save_as_png(t_data, out_file, im_h, im_w)
+
+
+    # Then run for all classes combined
+    # Converts data from a class-separated to a class-combined format.
+    data = butils.combine_classes(data)
+
+    # Run for each timestep.
+    for timestep, t_data in enumerate(data):
+        # Save out visualization
+        out_file = os.path.join(out_fol, 'all_classes', str(timestep).zfill(5) + '.png')
+        butils.save_as_png(t_data, out_file, im_h, im_w)
+
+    print('DONE:', seq_file)
+
+
+if __name__ == '__main__':
+
+    # Required to fix bug in multiprocessing on windows.
+    freeze_support()
+
+    # Obtain list of sequences to run tracker for.
+    if config['Benchmarks']:
+        benchmarks = config['Benchmarks']
+    else:
+        benchmarks = ['davis_unsupervised', 'kitti_mots', 'youtube_vis', 'ovis', 'bdd_mots', 'tao']
+        if config['SPLIT'] != 'train':
+            benchmarks += ['waymo', 'mots_challenge']
+    seqs_todo = []
+    for bench in benchmarks:
+        bench_fol = config['INPUT_FOL'].format(split=config['SPLIT'], bench=bench)
+        seqs_todo += [os.path.join(bench_fol, seq) for seq in os.listdir(bench_fol)]
+
+    # Run in parallel
+    if config['Num_Parallel_Cores']:
+        with Pool(config['Num_Parallel_Cores']) as pool:
+            results = pool.map(do_sequence, seqs_todo)
+
+    # Run in series
+    else:
+        for seq_todo in seqs_todo:
+            do_sequence(seq_todo)
--- a/test/yolov7-tracker/tracker/trackeval/datasets/init.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/init.py
@@ -0,0 +1,15 @@
+from .kitti_2d_box import Kitti2DBox
+from .kitti_mots import KittiMOTS
+from .mot_challenge_2d_box import MotChallenge2DBox
+from .mots_challenge import MOTSChallenge
+from .bdd100k import BDD100K
+from .davis import DAVIS
+from .tao import TAO
+from .tao_ow import TAO_OW
+from .burst import BURST
+from .burst_ow import BURST_OW
+from .youtube_vis import YouTubeVIS
+from .head_tracking_challenge import HeadTrackingChallenge
+from .rob_mots import RobMOTS
+from .person_path_22 import PersonPath22
+from .visdrone import VisDrone2DBox
--- a/test/yolov7-tracker/tracker/trackeval/datasets/_base_dataset.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/_base_dataset.py
@@ -0,0 +1,326 @@
+import csv
+import io
+import zipfile
+import os
+import traceback
+import numpy as np
+from copy import deepcopy
+from abc import ABC, abstractmethod
+from .. import _timing
+from ..utils import TrackEvalException
+
+
+class _BaseDataset(ABC):
+    @abstractmethod
+    def __init__(self):
+        self.tracker_list = None
+        self.seq_list = None
+        self.class_list = None
+        self.output_fol = None
+        self.output_sub_fol = None
+        self.should_classes_combine = True
+        self.use_super_categories = False
+
+    # Functions to implement:
+
+    @staticmethod
+    @abstractmethod
+    def get_default_dataset_config():
+        ...
+
+    @abstractmethod
+    def _load_raw_file(self, tracker, seq, is_gt):
+        ...
+
+    @_timing.time
+    @abstractmethod
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        ...
+
+    @abstractmethod
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        ...
+
+    # Helper functions for all datasets:
+
+    @classmethod
+    def get_class_name(cls):
+        return cls.__name__
+
+    def get_name(self):
+        return self.get_class_name()
+
+    def get_output_fol(self, tracker):
+        return os.path.join(self.output_fol, tracker, self.output_sub_fol)
+
+    def get_display_name(self, tracker):
+        """ Can be overwritten if the trackers name (in files) is different to how it should be displayed.
+        By default this method just returns the trackers name as is.
+        """
+        return tracker
+
+    def get_eval_info(self):
+        """Return info about the dataset needed for the Evaluator"""
+        return self.tracker_list, self.seq_list, self.class_list
+
+    @_timing.time
+    def get_raw_seq_data(self, tracker, seq):
+        """ Loads raw data (tracker and ground-truth) for a single tracker on a single sequence.
+        Raw data includes all of the information needed for both preprocessing and evaluation, for all classes.
+        A later function (get_processed_seq_data) will perform such preprocessing and extract relevant information for
+        the evaluation of each class.
+
+        This returns a dict which contains the fields:
+        [num_timesteps]: integer
+        [gt_ids, tracker_ids, gt_classes, tracker_classes, tracker_confidences]:
+                                                                list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets, tracker_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
+        [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        [gt_extras]: dict (for each extra) of lists (for each timestep) of 1D NDArrays (for each det).
+
+        gt_extras contains dataset specific information used for preprocessing such as occlusion and truncation levels.
+
+        Note that similarities are extracted as part of the dataset and not the metric, because almost all metrics are
+        independent of the exact method of calculating the similarity. However datasets are not (e.g. segmentation
+        masks vs 2D boxes vs 3D boxes).
+        We calculate the similarity before preprocessing because often both preprocessing and evaluation require it and
+        we don't wish to calculate this twice.
+        We calculate similarity between all gt and tracker classes (not just each class individually) to allow for
+        calculation of metrics such as class confusion matrices. Typically the impact of this on performance is low.
+        """
+        # Load raw data.
+        raw_gt_data = self._load_raw_file(tracker, seq, is_gt=True)
+        raw_tracker_data = self._load_raw_file(tracker, seq, is_gt=False)
+        raw_data = {**raw_tracker_data, **raw_gt_data}  # Merges dictionaries
+
+        # Calculate similarities for each timestep.
+        similarity_scores = []
+        for t, (gt_dets_t, tracker_dets_t) in enumerate(zip(raw_data['gt_dets'], raw_data['tracker_dets'])):
+            ious = self._calculate_similarities(gt_dets_t, tracker_dets_t)
+            similarity_scores.append(ious)
+        raw_data['similarity_scores'] = similarity_scores
+        return raw_data
+
+    @staticmethod
+    def _load_simple_text_file(file, time_col=0, id_col=None, remove_negative_ids=False, valid_filter=None,
+                               crowd_ignore_filter=None, convert_filter=None, is_zipped=False, zip_file=None,
+                               force_delimiters=None):
+        """ Function that loads data which is in a commonly used text file format.
+        Assumes each det is given by one row of a text file.
+        There is no limit to the number or meaning of each column,
+        however one column needs to give the timestep of each det (time_col) which is default col 0.
+
+        The file dialect (deliminator, num cols, etc) is determined automatically.
+        This function automatically separates dets by timestep,
+        and is much faster than alternatives such as np.loadtext or pandas.
+
+        If remove_negative_ids is True and id_col is not None, dets with negative values in id_col are excluded.
+        These are not excluded from ignore data.
+
+        valid_filter can be used to only include certain classes.
+        It is a dict with ints as keys, and lists as values,
+        such that a row is included if "row[key].lower() is in value" for all key/value pairs in the dict.
+        If None, all classes are included.
+
+        crowd_ignore_filter can be used to read crowd_ignore regions separately. It has the same format as valid filter.
+
+        convert_filter can be used to convert value read to another format.
+        This is used most commonly to convert classes given as string to a class id.
+        This is a dict such that the key is the column to convert, and the value is another dict giving the mapping.
+
+        Optionally, input files could be a zip of multiple text files for storage efficiency.
+
+        Returns read_data and ignore_data.
+        Each is a dict (with keys as timesteps as strings) of lists (over dets) of lists (over column values).
+        Note that all data is returned as strings, and must be converted to float/int later if needed.
+        Note that timesteps will not be present in the returned dict keys if there are no dets for them
+        """
+
+        if remove_negative_ids and id_col is None:
+            raise TrackEvalException('remove_negative_ids is True, but id_col is not given.')
+        if crowd_ignore_filter is None:
+            crowd_ignore_filter = {}
+        if convert_filter is None:
+            convert_filter = {}
+        try:
+            if is_zipped:  # Either open file directly or within a zip.
+                if zip_file is None:
+                    raise TrackEvalException('is_zipped set to True, but no zip_file is given.')
+                archive = zipfile.ZipFile(os.path.join(zip_file), 'r')
+                fp = io.TextIOWrapper(archive.open(file, 'r'))
+            else:
+                fp = open(file)
+            read_data = {}
+            crowd_ignore_data = {}
+            fp.seek(0, os.SEEK_END)
+            # check if file is empty
+            if fp.tell():
+                fp.seek(0)
+                dialect = csv.Sniffer().sniff(fp.readline(), delimiters=force_delimiters)  # Auto determine structure.
+                dialect.skipinitialspace = True  # Deal with extra spaces between columns
+                fp.seek(0)
+                reader = csv.reader(fp, dialect)
+                for row in reader:
+                    try:
+                        # Deal with extra trailing spaces at the end of rows
+                        if row[-1] in '':
+                            row = row[:-1]
+                        timestep = str(int(float(row[time_col])))
+                        # Read ignore regions separately.
+                        is_ignored = False
+                        for ignore_key, ignore_value in crowd_ignore_filter.items():
+                            if row[ignore_key].lower() in ignore_value:
+                                # Convert values in one column (e.g. string to id)
+                                for convert_key, convert_value in convert_filter.items():
+                                    row[convert_key] = convert_value[row[convert_key].lower()]
+                                # Save data separated by timestep.
+                                if timestep in crowd_ignore_data.keys():
+                                    crowd_ignore_data[timestep].append(row)
+                                else:
+                                    crowd_ignore_data[timestep] = [row]
+                                is_ignored = True
+                        if is_ignored:  # if det is an ignore region, it cannot be a normal det.
+                            continue
+                        # Exclude some dets if not valid.
+                        if valid_filter is not None:
+                            for key, value in valid_filter.items():
+                                if row[key].lower() not in value:
+                                    continue
+                        if remove_negative_ids:
+                            if int(float(row[id_col])) < 0:
+                                continue
+                        # Convert values in one column (e.g. string to id)
+                        for convert_key, convert_value in convert_filter.items():
+                            row[convert_key] = convert_value[row[convert_key].lower()]
+                        # Save data separated by timestep.
+                        if timestep in read_data.keys():
+                            read_data[timestep].append(row)
+                        else:
+                            read_data[timestep] = [row]
+                    except Exception:
+                        exc_str_init = 'In file %s the following line cannot be read correctly: \n' % os.path.basename(
+                            file)
+                        exc_str = ' '.join([exc_str_init]+row)
+                        raise TrackEvalException(exc_str)
+            fp.close()
+        except Exception:
+            print('Error loading file: %s, printing traceback.' % file)
+            traceback.print_exc()
+            raise TrackEvalException(
+                'File %s cannot be read because it is either not present or invalidly formatted' % os.path.basename(
+                    file))
+        return read_data, crowd_ignore_data
+
+    @staticmethod
+    def _calculate_mask_ious(masks1, masks2, is_encoded=False, do_ioa=False):
+        """ Calculates the IOU (intersection over union) between two arrays of segmentation masks.
+        If is_encoded a run length encoding with pycocotools is assumed as input format, otherwise an input of numpy
+        arrays of the shape (num_masks, height, width) is assumed and the encoding is performed.
+        If do_ioa (intersection over area) , then calculates the intersection over the area of masks1 - this is commonly
+        used to determine if detections are within crowd ignore region.
+        :param masks1:  first set of masks (numpy array of shape (num_masks, height, width) if not encoded,
+                        else pycocotools rle encoded format)
+        :param masks2:  second set of masks (numpy array of shape (num_masks, height, width) if not encoded,
+                        else pycocotools rle encoded format)
+        :param is_encoded: whether the input is in pycocotools rle encoded format
+        :param do_ioa: whether to perform IoA computation
+        :return: the IoU/IoA scores
+        """
+
+        # Only loaded when run to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        # use pycocotools for run length encoding of masks
+        if not is_encoded:
+            masks1 = mask_utils.encode(np.array(np.transpose(masks1, (1, 2, 0)), order='F'))
+            masks2 = mask_utils.encode(np.array(np.transpose(masks2, (1, 2, 0)), order='F'))
+
+        # use pycocotools for iou computation of rle encoded masks
+        ious = mask_utils.iou(masks1, masks2, [do_ioa]*len(masks2))
+        if len(masks1) == 0 or len(masks2) == 0:
+            ious = np.asarray(ious).reshape(len(masks1), len(masks2))
+        assert (ious >= 0 - np.finfo('float').eps).all()
+        assert (ious <= 1 + np.finfo('float').eps).all()
+
+        return ious
+
+    @staticmethod
+    def _calculate_box_ious(bboxes1, bboxes2, box_format='xywh', do_ioa=False):
+        """ Calculates the IOU (intersection over union) between two arrays of boxes.
+        Allows variable box formats ('xywh' and 'x0y0x1y1').
+        If do_ioa (intersection over area) , then calculates the intersection over the area of boxes1 - this is commonly
+        used to determine if detections are within crowd ignore region.
+        """
+        if box_format in 'xywh':
+            # layout: (x0, y0, w, h)
+            bboxes1 = deepcopy(bboxes1)
+            bboxes2 = deepcopy(bboxes2)
+
+            bboxes1[:, 2] = bboxes1[:, 0] + bboxes1[:, 2]
+            bboxes1[:, 3] = bboxes1[:, 1] + bboxes1[:, 3]
+            bboxes2[:, 2] = bboxes2[:, 0] + bboxes2[:, 2]
+            bboxes2[:, 3] = bboxes2[:, 1] + bboxes2[:, 3]
+        elif box_format not in 'x0y0x1y1':
+            raise (TrackEvalException('box_format %s is not implemented' % box_format))
+
+        # layout: (x0, y0, x1, y1)
+        min_ = np.minimum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
+        max_ = np.maximum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
+        intersection = np.maximum(min_[..., 2] - max_[..., 0], 0) * np.maximum(min_[..., 3] - max_[..., 1], 0)
+        area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
+
+        if do_ioa:
+            ioas = np.zeros_like(intersection)
+            valid_mask = area1 > 0 + np.finfo('float').eps
+            ioas[valid_mask, :] = intersection[valid_mask, :] / area1[valid_mask][:, np.newaxis]
+
+            return ioas
+        else:
+            area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
+            union = area1[:, np.newaxis] + area2[np.newaxis, :] - intersection
+            intersection[area1 <= 0 + np.finfo('float').eps, :] = 0
+            intersection[:, area2 <= 0 + np.finfo('float').eps] = 0
+            intersection[union <= 0 + np.finfo('float').eps] = 0
+            union[union <= 0 + np.finfo('float').eps] = 1
+            ious = intersection / union
+            return ious
+
+    @staticmethod
+    def _calculate_euclidean_similarity(dets1, dets2, zero_distance=2.0):
+        """ Calculates the euclidean distance between two sets of detections, and then converts this into a similarity
+        measure with values between 0 and 1 using the following formula: sim = max(0, 1 - dist/zero_distance).
+        The default zero_distance of 2.0, corresponds to the default used in MOT15_3D, such that a 0.5 similarity
+        threshold corresponds to a 1m distance threshold for TPs.
+        """
+        dist = np.linalg.norm(dets1[:, np.newaxis]-dets2[np.newaxis, :], axis=2)
+        sim = np.maximum(0, 1 - dist/zero_distance)
+        return sim
+
+    @staticmethod
+    def _check_unique_ids(data, after_preproc=False):
+        """Check the requirement that the tracker_ids and gt_ids are unique per timestep"""
+        gt_ids = data['gt_ids']
+        tracker_ids = data['tracker_ids']
+        for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(gt_ids, tracker_ids)):
+            if len(tracker_ids_t) > 0:
+                unique_ids, counts = np.unique(tracker_ids_t, return_counts=True)
+                if np.max(counts) != 1:
+                    duplicate_ids = unique_ids[counts > 1]
+                    exc_str_init = 'Tracker predicts the same ID more than once in a single timestep ' \
+                                   '(seq: %s, frame: %i, ids:' % (data['seq'], t+1)
+                    exc_str = ' '.join([exc_str_init] + [str(d) for d in duplicate_ids]) + ')'
+                    if after_preproc:
+                        exc_str_init += '\n Note that this error occurred after preprocessing (but not before), ' \
+                                        'so ids may not be as in file, and something seems wrong with preproc.'
+                    raise TrackEvalException(exc_str)
+            if len(gt_ids_t) > 0:
+                unique_ids, counts = np.unique(gt_ids_t, return_counts=True)
+                if np.max(counts) != 1:
+                    duplicate_ids = unique_ids[counts > 1]
+                    exc_str_init = 'Ground-truth has the same ID more than once in a single timestep ' \
+                                   '(seq: %s, frame: %i, ids:' % (data['seq'], t+1)
+                    exc_str = ' '.join([exc_str_init] + [str(d) for d in duplicate_ids]) + ')'
+                    if after_preproc:
+                        exc_str_init += '\n Note that this error occurred after preprocessing (but not before), ' \
+                                        'so ids may not be as in file, and something seems wrong with preproc.'
+                    raise TrackEvalException(exc_str)
--- a/test/yolov7-tracker/tracker/trackeval/datasets/bdd100k.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/bdd100k.py
@@ -0,0 +1,302 @@
+
+import os
+import json
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ..utils import TrackEvalException
+from ._base_dataset import _BaseDataset
+from .. import utils
+from .. import _timing
+
+
+class BDD100K(_BaseDataset):
+    """Dataset class for BDD100K tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/bdd100k/bdd100k_val'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/bdd100k/bdd100k_val'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'CLASSES_TO_EVAL': ['pedestrian', 'rider', 'car', 'bus', 'truck', 'train', 'motorcycle', 'bicycle'],
+            # Valid: ['pedestrian', 'rider', 'car', 'bus', 'truck', 'train', 'motorcycle', 'bicycle']
+            'SPLIT_TO_EVAL': 'val',  # Valid: 'training', 'val',
+            'INPUT_AS_ZIP': False,  # Whether tracker input files are zipped
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+        self.gt_fol = self.config['GT_FOLDER']
+        self.tracker_fol = self.config['TRACKERS_FOLDER']
+        self.should_classes_combine = True
+        self.use_super_categories = True
+
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+
+        # Get classes to eval
+        self.valid_classes = ['pedestrian', 'rider', 'car', 'bus', 'truck', 'train', 'motorcycle', 'bicycle']
+        self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+                           for cls in self.config['CLASSES_TO_EVAL']]
+        if not all(self.class_list):
+            raise TrackEvalException('Attempted to evaluate an invalid class. Only classes [pedestrian, rider, car, '
+                                     'bus, truck, train, motorcycle, bicycle] are valid.')
+        self.super_categories = {"HUMAN": [cls for cls in ["pedestrian", "rider"] if cls in self.class_list],
+                                 "VEHICLE": [cls for cls in ["car", "truck", "bus", "train"] if cls in self.class_list],
+                                 "BIKE": [cls for cls in ["motorcycle", "bicycle"] if cls in self.class_list]}
+        self.distractor_classes = ['other person', 'trailer', 'other vehicle']
+        self.class_name_to_class_id = {'pedestrian': 1, 'rider': 2, 'other person': 3, 'car': 4, 'bus': 5, 'truck': 6,
+                                       'train': 7, 'trailer': 8, 'other vehicle': 9, 'motorcycle': 10, 'bicycle': 11}
+
+        # Get sequences to eval
+        self.seq_list = []
+        self.seq_lengths = {}
+
+        self.seq_list = [seq_file.replace('.json', '') for seq_file in os.listdir(self.gt_fol)]
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        for tracker in self.tracker_list:
+            for seq in self.seq_list:
+                curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.json')
+                if not os.path.isfile(curr_file):
+                    print('Tracker file not found: ' + curr_file)
+                    raise TrackEvalException(
+                        'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
+                            curr_file))
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the BDD100K format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        """
+        # File location
+        if is_gt:
+            file = os.path.join(self.gt_fol, seq + '.json')
+        else:
+            file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.json')
+
+        with open(file) as f:
+            data = json.load(f)
+
+        # sort data by frame index
+        data = sorted(data, key=lambda x: x['index'])
+
+        # check sequence length
+        if is_gt:
+            self.seq_lengths[seq] = len(data)
+            num_timesteps = len(data)
+        else:
+            num_timesteps = self.seq_lengths[seq]
+            if num_timesteps != len(data):
+                raise TrackEvalException('Number of ground truth and tracker timesteps do not match for sequence %s'
+                                         % seq)
+
+        # Convert data to required format
+        data_keys = ['ids', 'classes', 'dets']
+        if is_gt:
+            data_keys += ['gt_crowd_ignore_regions']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+        for t in range(num_timesteps):
+            ig_ids = []
+            keep_ids = []
+            for i in range(len(data[t]['labels'])):
+                ann = data[t]['labels'][i]
+                if is_gt and (ann['category'] in self.distractor_classes or 'attributes' in ann.keys()
+                              and ann['attributes']['Crowd']):
+                    ig_ids.append(i)
+                else:
+                    keep_ids.append(i)
+
+            if keep_ids:
+                raw_data['dets'][t] = np.atleast_2d([[data[t]['labels'][i]['box2d']['x1'],
+                                                      data[t]['labels'][i]['box2d']['y1'],
+                                                      data[t]['labels'][i]['box2d']['x2'],
+                                                      data[t]['labels'][i]['box2d']['y2']
+                                                      ] for i in keep_ids]).astype(float)
+                raw_data['ids'][t] = np.atleast_1d([data[t]['labels'][i]['id'] for i in keep_ids]).astype(int)
+                raw_data['classes'][t] = np.atleast_1d([self.class_name_to_class_id[data[t]['labels'][i]['category']]
+                                                        for i in keep_ids]).astype(int)
+            else:
+                raw_data['dets'][t] = np.empty((0, 4)).astype(float)
+                raw_data['ids'][t] = np.empty(0).astype(int)
+                raw_data['classes'][t] = np.empty(0).astype(int)
+
+            if is_gt:
+                if ig_ids:
+                    raw_data['gt_crowd_ignore_regions'][t] = np.atleast_2d([[data[t]['labels'][i]['box2d']['x1'],
+                                                                             data[t]['labels'][i]['box2d']['y1'],
+                                                                             data[t]['labels'][i]['box2d']['x2'],
+                                                                             data[t]['labels'][i]['box2d']['y2']
+                                                                             ] for i in ig_ids]).astype(float)
+                else:
+                    raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4)).astype(float)
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+        raw_data['num_timesteps'] = num_timesteps
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+
+        BDD100K:
+            In BDD100K, the 4 preproc steps are as follow:
+                1) There are eight classes (pedestrian, rider, car, bus, truck, train, motorcycle, bicycle)
+                    which are evaluated separately.
+                2) For BDD100K there is no removal of matched tracker dets.
+                3) Crowd ignore regions are used to remove unmatched detections.
+                4) No removal of gt dets.
+        """
+        cls_id = self.class_name_to_class_id[cls]
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+        for t in range(raw_data['num_timesteps']):
+
+            # Only extract relevant dets for this class for preproc and eval (cls)
+            gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
+            gt_class_mask = gt_class_mask.astype(np.bool)
+            gt_ids = raw_data['gt_ids'][t][gt_class_mask]
+            gt_dets = raw_data['gt_dets'][t][gt_class_mask]
+
+            tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
+            tracker_class_mask = tracker_class_mask.astype(np.bool)
+            tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
+            tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
+            similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
+
+            # Match tracker and gt dets (with hungarian algorithm)
+            unmatched_indices = np.arange(tracker_ids.shape[0])
+            if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
+                matching_scores = similarity_scores.copy()
+                matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
+                match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
+                match_cols = match_cols[actually_matched_mask]
+                unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
+
+            # For unmatched tracker dets, remove those that are greater than 50% within a crowd ignore region.
+            unmatched_tracker_dets = tracker_dets[unmatched_indices, :]
+            crowd_ignore_regions = raw_data['gt_crowd_ignore_regions'][t]
+            intersection_with_ignore_region = self._calculate_box_ious(unmatched_tracker_dets, crowd_ignore_regions,
+                                                                       box_format='x0y0x1y1', do_ioa=True)
+            is_within_crowd_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps,
+                                                   axis=1)
+
+            # Apply preprocessing to remove unwanted tracker dets.
+            to_remove_tracker = unmatched_indices[is_within_crowd_ignore_region]
+            data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+            data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+            similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+
+            data['gt_ids'][t] = gt_ids
+            data['gt_dets'][t] = gt_dets
+            data['similarity_scores'][t] = similarity_scores
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+
+        # Ensure that ids are unique per timestep.
+        self._check_unique_ids(data)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='x0y0x1y1')
+        return similarity_scores
--- a/test/yolov7-tracker/tracker/trackeval/datasets/burst.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/burst.py
@@ -0,0 +1,49 @@
+import os
+from .burst_helpers.burst_base import BURSTBase
+from .burst_helpers.format_converter import GroundTruthBURSTFormatToTAOFormatConverter, PredictionBURSTFormatToTAOFormatConverter
+from .. import utils
+
+
+class BURST(BURSTBase):
+    """Dataset class for TAO tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        tao_config = BURSTBase.get_default_dataset_config()
+        code_path = utils.get_code_path()
+
+        # e.g. 'data/gt/tsunami/exemplar_guided/'
+        tao_config['GT_FOLDER'] = os.path.join(
+            code_path, 'data/gt/burst/val/')  # Location of GT data
+        # e.g. 'data/trackers/tsunami/exemplar_guided/mask_guided/validation/'
+        tao_config['TRACKERS_FOLDER'] = os.path.join(
+            code_path, 'data/trackers/burst/class-guided/')  # Trackers location
+        # set to True or False
+        tao_config['EXEMPLAR_GUIDED'] = False
+        return tao_config
+
+    def _iou_type(self):
+        return 'mask'
+
+    def _box_or_mask_from_det(self, det):
+        return det['segmentation']
+
+    def _calculate_area_for_ann(self, ann):
+        import pycocotools.mask as cocomask
+        return cocomask.area(ann["segmentation"])
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
+        return similarity_scores
+
+    def _is_exemplar_guided(self):
+        exemplar_guided = self.config['EXEMPLAR_GUIDED']
+        return exemplar_guided
+
+    def _postproc_ground_truth_data(self, data):
+        return GroundTruthBURSTFormatToTAOFormatConverter(data).convert()
+
+    def _postproc_prediction_data(self, data):
+        return PredictionBURSTFormatToTAOFormatConverter(
+            self.gt_data, data,
+            exemplar_guided=self._is_exemplar_guided()).convert()
--- a/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/BURST_SPECIFIC_ISSUES.md
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/BURST_SPECIFIC_ISSUES.md
@@ -0,0 +1,7 @@
+The track ids in both ground truth and predictions are not globally unique, but
+start from 1 for each video. At the moment when converting from Ali format to
+TAO format, we remap the ids to be globally unique. It would be better to
+directly have this in the data though.
+
+
+Improve setting of EXEMPLAR_GUIDED flag, maybe this can be done automatically.
--- a/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/init.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/init.py
--- a/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/burst_base.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/burst_base.py
@@ -0,0 +1,591 @@
+import os
+import numpy as np
+import json
+import itertools
+from collections import defaultdict
+from scipy.optimize import linear_sum_assignment
+from trackeval.utils import TrackEvalException
+from trackeval.datasets._base_dataset import _BaseDataset
+from trackeval import utils
+from trackeval import _timing
+
+
+class BURSTBase(_BaseDataset):
+    """Dataset class for TAO tracking"""
+
+    def _postproc_ground_truth_data(self, data):
+        return data
+
+    def _postproc_prediction_data(self, data):
+        return data
+
+    def _iou_type(self):
+        return 'bbox'
+
+    def _box_or_mask_from_det(self, det):
+        return np.atleast_1d(det['bbox'])
+
+    def _calculate_area_for_ann(self, ann):
+        return ann["bbox"][2] * ann["bbox"][3]
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'CLASSES_TO_EVAL': None,  # Classes to eval (if None, all classes)
+            'SPLIT_TO_EVAL': 'training',  # Valid: 'training', 'val'
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            'MAX_DETECTIONS': 300,  # Number of maximal allowed detections per image (0 for unlimited)
+            'EXEMPLAR_GUIDED': False,
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+        self.gt_fol = self.config['GT_FOLDER']
+        self.tracker_fol = self.config['TRACKERS_FOLDER']
+        self.should_classes_combine = True
+        self.use_super_categories = False
+
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+
+        gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
+        if len(gt_dir_files) != 1:
+            raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
+
+        with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
+            self.gt_data = self._postproc_ground_truth_data(json.load(f))
+
+        # merge categories marked with a merged tag in TAO dataset
+        self._merge_categories(self.gt_data['annotations'] + self.gt_data['tracks'])
+
+        # Get sequences to eval and sequence information
+        self.seq_list = [vid['name'].replace('/', '-') for vid in self.gt_data['videos']]
+        self.seq_name_to_seq_id = {vid['name'].replace('/', '-'): vid['id'] for vid in self.gt_data['videos']}
+        # compute mappings from videos to annotation data
+        self.videos_to_gt_tracks, self.videos_to_gt_images = self._compute_vid_mappings(self.gt_data['annotations'])
+        # compute sequence lengths
+        self.seq_lengths = {vid['id']: 0 for vid in self.gt_data['videos']}
+        for img in self.gt_data['images']:
+            self.seq_lengths[img['video_id']] += 1
+        self.seq_to_images_to_timestep = self._compute_image_to_timestep_mappings()
+        self.seq_to_classes = {vid['id']: {'pos_cat_ids': list({track['category_id'] for track
+                                                                in self.videos_to_gt_tracks[vid['id']]}),
+                                           'neg_cat_ids': vid['neg_category_ids'],
+                                           'not_exhaustively_labeled_cat_ids': vid['not_exhaustive_category_ids']}
+                               for vid in self.gt_data['videos']}
+
+        # Get classes to eval
+        considered_vid_ids = [self.seq_name_to_seq_id[vid] for vid in self.seq_list]
+        seen_cats = set([cat_id for vid_id in considered_vid_ids for cat_id
+                         in self.seq_to_classes[vid_id]['pos_cat_ids']])
+        # only classes with ground truth are evaluated in TAO, also we don't evaluate distactors.
+        distractors = {20, 63, 108, 180, 188, 204, 212, 247, 303, 403, 407, 415, 490, 504, 507, 513, 529, 567,
+                       569, 588, 672, 691, 702, 708, 711, 720, 736, 737, 798, 813, 815, 827, 831, 851, 877, 883,
+                       912, 971, 976, 1130, 1133, 1134, 1169, 1184, 1220}
+        self.valid_classes = [cls['name'] for cls in self.gt_data['categories'] if (cls['id'] in seen_cats) and (cls['id'] not in distractors)]
+        cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
+
+        if self.config['CLASSES_TO_EVAL']:
+            self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+                               for cls in self.config['CLASSES_TO_EVAL']]
+            if not all(self.class_list):
+                raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
+                                         ', '.join(self.valid_classes) +
+                                         ' are valid (classes present in ground truth data).')
+        else:
+            self.class_list = [cls for cls in self.valid_classes]
+        self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
+
+        for tracker in self.tracker_list:
+            tr_dir_files = [file for file in os.listdir(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol))
+                            if file.endswith('.json')]
+            if len(tr_dir_files) != 1:
+                raise TrackEvalException(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
+                                         + ' does not contain exactly one json file.')
+            with open(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, tr_dir_files[0])) as f:
+                curr_data = self._postproc_prediction_data(json.load(f))
+
+            # limit detections if MAX_DETECTIONS > 0
+            if self.config['MAX_DETECTIONS']:
+                curr_data = self._limit_dets_per_image(curr_data)
+
+            # fill missing video ids
+            self._fill_video_ids_inplace(curr_data)
+
+            # make track ids unique over whole evaluation set
+            self._make_track_ids_unique(curr_data)
+
+            # merge categories marked with a merged tag in TAO dataset
+            self._merge_categories(curr_data)
+
+            # get tracker sequence information
+            curr_videos_to_tracker_tracks, curr_videos_to_tracker_images = self._compute_vid_mappings(curr_data)
+            self.tracker_data[tracker]['vids_to_tracks'] = curr_videos_to_tracker_tracks
+            self.tracker_data[tracker]['vids_to_images'] = curr_videos_to_tracker_images
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the TAO format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets]: list (for each timestep) of lists of detections.
+        [classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_lengths]: dictionary with class values
+                                as keys and lists (for each track) as values
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        [classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_dt_track_ids, classes_to_dt_track_areas, classes_to_dt_track_lengths]: dictionary with class values
+                                                                                           as keys and lists as values
+        [classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
+        """
+        seq_id = self.seq_name_to_seq_id[seq]
+        # File location
+        if is_gt:
+            imgs = self.videos_to_gt_images[seq_id]
+        else:
+            imgs = self.tracker_data[tracker]['vids_to_images'][seq_id]
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq_id]
+        img_to_timestep = self.seq_to_images_to_timestep[seq_id]
+        data_keys = ['ids', 'classes', 'dets']
+        if not is_gt:
+            data_keys += ['tracker_confidences']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+        for img in imgs:
+            # some tracker data contains images without any ground truth information, these are ignored
+            try:
+                t = img_to_timestep[img['id']]
+            except KeyError:
+                continue
+            annotations = img['annotations']
+            raw_data['dets'][t] = np.atleast_2d([ann['bbox'] for ann in annotations]).astype(float)
+            raw_data['ids'][t] = np.atleast_1d([ann['track_id'] for ann in annotations]).astype(int)
+            raw_data['classes'][t] = np.atleast_1d([ann['category_id'] for ann in annotations]).astype(int)
+            if not is_gt:
+                raw_data['tracker_confidences'][t] = np.atleast_1d([ann['score'] for ann in annotations]).astype(float)
+
+        for t, d in enumerate(raw_data['dets']):
+            if d is None:
+                raw_data['dets'][t] = np.empty((0, 4)).astype(float)
+                raw_data['ids'][t] = np.empty(0).astype(int)
+                raw_data['classes'][t] = np.empty(0).astype(int)
+                if not is_gt:
+                    raw_data['tracker_confidences'][t] = np.empty(0)
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        all_classes = [self.class_name_to_class_id[cls] for cls in self.class_list]
+        if is_gt:
+            classes_to_consider = all_classes
+            all_tracks = self.videos_to_gt_tracks[seq_id]
+        else:
+            classes_to_consider = self.seq_to_classes[seq_id]['pos_cat_ids'] \
+                                  + self.seq_to_classes[seq_id]['neg_cat_ids']
+            all_tracks = self.tracker_data[tracker]['vids_to_tracks'][seq_id]
+
+        classes_to_tracks = {cls: [track for track in all_tracks if track['category_id'] == cls]
+                             if cls in classes_to_consider else [] for cls in all_classes}
+
+        # mapping from classes to track information
+        raw_data['classes_to_tracks'] = {cls: [{det['image_id']: self._box_or_mask_from_det(det)
+                                                for det in track['annotations']} for track in tracks]
+                                         for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
+                                            for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
+                                              for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_lengths'] = {cls: [len(track['annotations']) for track in tracks]
+                                                for cls, tracks in classes_to_tracks.items()}
+
+        if not is_gt:
+            raw_data['classes_to_dt_track_scores'] = {cls: np.array([np.mean([float(x['score'])
+                                                                              for x in track['annotations']])
+                                                                     for track in tracks])
+                                                      for cls, tracks in classes_to_tracks.items()}
+
+        if is_gt:
+            key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
+                       'classes_to_track_ids': 'classes_to_gt_track_ids',
+                       'classes_to_track_lengths': 'classes_to_gt_track_lengths',
+                       'classes_to_track_areas': 'classes_to_gt_track_areas'}
+        else:
+            key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
+                       'classes_to_track_ids': 'classes_to_dt_track_ids',
+                       'classes_to_track_lengths': 'classes_to_dt_track_lengths',
+                       'classes_to_track_areas': 'classes_to_dt_track_areas'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        raw_data['num_timesteps'] = num_timesteps
+        raw_data['neg_cat_ids'] = self.seq_to_classes[seq_id]['neg_cat_ids']
+        raw_data['not_exhaustively_labeled_cls'] = self.seq_to_classes[seq_id]['not_exhaustively_labeled_cat_ids']
+        raw_data['seq'] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+        TAO:
+            In TAO, the 4 preproc steps are as follow:
+                1) All classes present in the ground truth data are evaluated separately.
+                2) No matched tracker detections are removed.
+                3) Unmatched tracker detections are removed if there is not ground truth data and the class does not
+                    belong to the categories marked as negative for this sequence. Additionally, unmatched tracker
+                    detections for classes which are marked as not exhaustively labeled are removed.
+                4) No gt detections are removed.
+            Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
+            and the tracks from the tracker data are sorted according to the tracker confidence.
+        """
+        cls_id = self.class_name_to_class_id[cls]
+        is_not_exhaustively_labeled = cls_id in raw_data['not_exhaustively_labeled_cls']
+        is_neg_category = cls_id in raw_data['neg_cat_ids']
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+
+        for t in range(raw_data['num_timesteps']):
+
+            # Only extract relevant dets for this class for preproc and eval (cls)
+            gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
+            gt_class_mask = gt_class_mask.astype(np.bool)
+            gt_ids = raw_data['gt_ids'][t][gt_class_mask]
+            gt_dets = raw_data['gt_dets'][t][gt_class_mask]
+
+            tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
+            tracker_class_mask = tracker_class_mask.astype(np.bool)
+            tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
+            tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
+            tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
+            similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
+
+            if not self.config['EXEMPLAR_GUIDED']:
+                # Match tracker and gt dets (with hungarian algorithm).
+                unmatched_indices = np.arange(tracker_ids.shape[0])
+                if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
+                    matching_scores = similarity_scores.copy()
+                    matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
+                    match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                    actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
+                    match_cols = match_cols[actually_matched_mask]
+                    unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
+
+                if gt_ids.shape[0] == 0 and not is_neg_category:
+                    to_remove_tracker = unmatched_indices
+                elif is_not_exhaustively_labeled:
+                    to_remove_tracker = unmatched_indices
+                else:
+                    to_remove_tracker = np.array([], dtype=np.int)
+
+                # remove all unwanted unmatched tracker detections
+                data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+                data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+                data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
+                similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+            else:
+                data['tracker_ids'][t] = tracker_ids
+                data['tracker_dets'][t] = tracker_dets
+                data['tracker_confidences'][t] = tracker_confidences
+
+            data['gt_ids'][t] = gt_ids
+            data['gt_dets'][t] = gt_dets
+            data['similarity_scores'][t] = similarity_scores
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+        data['seq'] = raw_data['seq']
+
+        # get track representations
+        data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
+        data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
+        data['gt_track_lengths'] = raw_data['classes_to_gt_track_lengths'][cls_id]
+        data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
+        data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
+        data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
+        data['dt_track_lengths'] = raw_data['classes_to_dt_track_lengths'][cls_id]
+        data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
+        data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
+        data['not_exhaustively_labeled'] = is_not_exhaustively_labeled
+        data['iou_type'] = self._iou_type()
+
+        # sort tracker data tracks by tracker confidence scores
+        if data['dt_tracks']:
+            idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
+            data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
+            data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
+            data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
+            data['dt_track_lengths'] = [data['dt_track_lengths'][i] for i in idx]
+            data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
+        # Ensure that ids are unique per timestep.
+        self._check_unique_ids(data)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t)
+        return similarity_scores
+
+    def _merge_categories(self, annotations):
+        """
+        Merges categories with a merged tag. Adapted from https://github.com/TAO-Dataset
+        :param annotations: the annotations in which the classes should be merged
+        :return: None
+        """
+        merge_map = {}
+        for category in self.gt_data['categories']:
+            if 'merged' in category:
+                for to_merge in category['merged']:
+                    merge_map[to_merge['id']] = category['id']
+
+        for ann in annotations:
+            ann['category_id'] = merge_map.get(ann['category_id'], ann['category_id'])
+
+    def _compute_vid_mappings(self, annotations):
+        """
+        Computes mappings from Videos to corresponding tracks and images.
+        :param annotations: the annotations for which the mapping should be generated
+        :return: the video-to-track-mapping, the video-to-image-mapping
+        """
+        vids_to_tracks = {}
+        vids_to_imgs = {}
+        vid_ids = [vid['id'] for vid in self.gt_data['videos']]
+
+        # compute an mapping from image IDs to images
+        images = {}
+        for image in self.gt_data['images']:
+            images[image['id']] = image
+
+        for ann in annotations:
+            ann["area"] = self._calculate_area_for_ann(ann)
+
+            vid = ann["video_id"]
+            if ann["video_id"] not in vids_to_tracks.keys():
+                vids_to_tracks[ann["video_id"]] = list()
+            if ann["video_id"] not in vids_to_imgs.keys():
+                vids_to_imgs[ann["video_id"]] = list()
+
+            # Fill in vids_to_tracks
+            tid = ann["track_id"]
+            exist_tids = [track["id"] for track in vids_to_tracks[vid]]
+            try:
+                index1 = exist_tids.index(tid)
+            except ValueError:
+                index1 = -1
+            if tid not in exist_tids:
+                curr_track = {"id": tid, "category_id": ann["category_id"],
+                              "video_id": vid, "annotations": [ann]}
+                vids_to_tracks[vid].append(curr_track)
+            else:
+                vids_to_tracks[vid][index1]["annotations"].append(ann)
+
+            # Fill in vids_to_imgs
+            img_id = ann['image_id']
+            exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
+            try:
+                index2 = exist_img_ids.index(img_id)
+            except ValueError:
+                index2 = -1
+            if index2 == -1:
+                curr_img = {"id": img_id, "annotations": [ann]}
+                vids_to_imgs[vid].append(curr_img)
+            else:
+                vids_to_imgs[vid][index2]["annotations"].append(ann)
+
+        # sort annotations by frame index and compute track area
+        for vid, tracks in vids_to_tracks.items():
+            for track in tracks:
+                track["annotations"] = sorted(
+                    track['annotations'],
+                    key=lambda x: images[x['image_id']]['frame_index'])
+                # Computer average area
+                track["area"] = (sum(x['area'] for x in track['annotations']) / len(track['annotations']))
+
+        # Ensure all videos are present
+        for vid_id in vid_ids:
+            if vid_id not in vids_to_tracks.keys():
+                vids_to_tracks[vid_id] = []
+            if vid_id not in vids_to_imgs.keys():
+                vids_to_imgs[vid_id] = []
+
+        return vids_to_tracks, vids_to_imgs
+
+    def _compute_image_to_timestep_mappings(self):
+        """
+        Computes a mapping from images to the corresponding timestep in the sequence.
+        :return: the image-to-timestep-mapping
+        """
+        images = {}
+        for image in self.gt_data['images']:
+            images[image['id']] = image
+
+        seq_to_imgs_to_timestep = {vid['id']: dict() for vid in self.gt_data['videos']}
+        for vid in seq_to_imgs_to_timestep:
+            curr_imgs = [img['id'] for img in self.videos_to_gt_images[vid]]
+            curr_imgs = sorted(curr_imgs, key=lambda x: images[x]['frame_index'])
+            seq_to_imgs_to_timestep[vid] = {curr_imgs[i]: i for i in range(len(curr_imgs))}
+
+        return seq_to_imgs_to_timestep
+
+    def _limit_dets_per_image(self, annotations):
+        """
+        Limits the number of detections for each image to config['MAX_DETECTIONS']. Adapted from
+        https://github.com/TAO-Dataset/
+        :param annotations: the annotations in which the detections should be limited
+        :return: the annotations with limited detections
+        """
+        max_dets = self.config['MAX_DETECTIONS']
+        img_ann = defaultdict(list)
+        for ann in annotations:
+            img_ann[ann["image_id"]].append(ann)
+
+        for img_id, _anns in img_ann.items():
+            if len(_anns) <= max_dets:
+                continue
+            _anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
+            img_ann[img_id] = _anns[:max_dets]
+
+        return [ann for anns in img_ann.values() for ann in anns]
+
+    def _fill_video_ids_inplace(self, annotations):
+        """
+        Fills in missing video IDs inplace. Adapted from https://github.com/TAO-Dataset/
+        :param annotations: the annotations for which the videos IDs should be filled inplace
+        :return: None
+        """
+        missing_video_id = [x for x in annotations if 'video_id' not in x]
+        if missing_video_id:
+            image_id_to_video_id = {
+                x['id']: x['video_id'] for x in self.gt_data['images']
+            }
+            for x in missing_video_id:
+                x['video_id'] = image_id_to_video_id[x['image_id']]
+
+    @staticmethod
+    def _make_track_ids_unique(annotations):
+        """
+        Makes the track IDs unqiue over the whole annotation set. Adapted from https://github.com/TAO-Dataset/
+        :param annotations: the annotation set
+        :return: the number of updated IDs
+        """
+        track_id_videos = {}
+        track_ids_to_update = set()
+        max_track_id = 0
+        for ann in annotations:
+            t = ann['track_id']
+            if t not in track_id_videos:
+                track_id_videos[t] = ann['video_id']
+
+            if ann['video_id'] != track_id_videos[t]:
+                # Track id is assigned to multiple videos
+                track_ids_to_update.add(t)
+            max_track_id = max(max_track_id, t)
+
+        if track_ids_to_update:
+            #print('true')
+            next_id = itertools.count(max_track_id + 1)
+            new_track_ids = defaultdict(lambda: next(next_id))
+            for ann in annotations:
+                t = ann['track_id']
+                v = ann['video_id']
+                if t in track_ids_to_update:
+                    ann['track_id'] = new_track_ids[t, v]
+        return len(track_ids_to_update)
--- a/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/burst_ow_base.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/burst_ow_base.py
@@ -0,0 +1,675 @@
+import os
+import numpy as np
+import json
+import itertools
+from collections import defaultdict
+from scipy.optimize import linear_sum_assignment
+from trackeval.utils import TrackEvalException
+from trackeval.datasets._base_dataset import _BaseDataset
+from trackeval import utils
+from trackeval import _timing
+
+
+class BURST_OW_Base(_BaseDataset):
+    """Dataset class for TAO tracking"""
+
+    def _postproc_ground_truth_data(self, data):
+        return data
+
+    def _postproc_prediction_data(self, data):
+        return data
+
+    def _iou_type(self):
+        return 'bbox'
+
+    def _box_or_mask_from_det(self, det):
+        return np.atleast_1d(det['bbox'])
+
+    def _calculate_area_for_ann(self, ann):
+        return ann["bbox"][2] * ann["bbox"][3]
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'CLASSES_TO_EVAL': None,  # Classes to eval (if None, all classes)
+            'SPLIT_TO_EVAL': 'training',  # Valid: 'training', 'val'
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            'MAX_DETECTIONS': 300,  # Number of maximal allowed detections per image (0 for unlimited)
+            'SUBSET': 'all'
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+        self.gt_fol = self.config['GT_FOLDER']
+        self.tracker_fol = self.config['TRACKERS_FOLDER']
+        self.should_classes_combine = True
+        self.use_super_categories = False
+
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+
+        gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
+        if len(gt_dir_files) != 1:
+            raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
+
+        with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
+            self.gt_data = self._postproc_ground_truth_data(json.load(f))
+
+        self.subset = self.config['SUBSET']
+        if self.subset != 'all':
+            # Split GT data into `known`, `unknown` or `distractor`
+            self._split_known_unknown_distractor()
+            self.gt_data = self._filter_gt_data(self.gt_data)
+
+        # merge categories marked with a merged tag in TAO dataset
+        self._merge_categories(self.gt_data['annotations'] + self.gt_data['tracks'])
+
+        # Get sequences to eval and sequence information
+        self.seq_list = [vid['name'].replace('/', '-') for vid in self.gt_data['videos']]
+        self.seq_name_to_seq_id = {vid['name'].replace('/', '-'): vid['id'] for vid in self.gt_data['videos']}
+        # compute mappings from videos to annotation data
+        self.videos_to_gt_tracks, self.videos_to_gt_images = self._compute_vid_mappings(self.gt_data['annotations'])
+        # compute sequence lengths
+        self.seq_lengths = {vid['id']: 0 for vid in self.gt_data['videos']}
+        for img in self.gt_data['images']:
+            self.seq_lengths[img['video_id']] += 1
+        self.seq_to_images_to_timestep = self._compute_image_to_timestep_mappings()
+        self.seq_to_classes = {vid['id']: {'pos_cat_ids': list({track['category_id'] for track
+                                                                in self.videos_to_gt_tracks[vid['id']]}),
+                                           'neg_cat_ids': vid['neg_category_ids'],
+                                           'not_exhaustively_labeled_cat_ids': vid['not_exhaustive_category_ids']}
+                               for vid in self.gt_data['videos']}
+
+        # Get classes to eval
+        considered_vid_ids = [self.seq_name_to_seq_id[vid] for vid in self.seq_list]
+        seen_cats = set([cat_id for vid_id in considered_vid_ids for cat_id
+                         in self.seq_to_classes[vid_id]['pos_cat_ids']])
+        # only classes with ground truth are evaluated in TAO
+        self.valid_classes = [cls['name'] for cls in self.gt_data['categories'] if cls['id'] in seen_cats]
+        # cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
+
+        if self.config['CLASSES_TO_EVAL']:
+            # self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+            #                    for cls in self.config['CLASSES_TO_EVAL']]
+            self.class_list = ["object"]  # class-agnostic
+            if not all(self.class_list):
+                raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
+                                         ', '.join(self.valid_classes) +
+                                         ' are valid (classes present in ground truth data).')
+        else:
+            # self.class_list = [cls for cls in self.valid_classes]
+            self.class_list = ["object"]  # class-agnostic
+        # self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
+        self.class_name_to_class_id = {"object": 1}  # class-agnostic
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
+
+        for tracker in self.tracker_list:
+            tr_dir_files = [file for file in os.listdir(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol))
+                            if file.endswith('.json')]
+            if len(tr_dir_files) != 1:
+                raise TrackEvalException(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
+                                         + ' does not contain exactly one json file.')
+            with open(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, tr_dir_files[0])) as f:
+                curr_data = self._postproc_prediction_data(json.load(f))
+
+            # limit detections if MAX_DETECTIONS > 0
+            if self.config['MAX_DETECTIONS']:
+                curr_data = self._limit_dets_per_image(curr_data)
+
+            # fill missing video ids
+            self._fill_video_ids_inplace(curr_data)
+
+            # make track ids unique over whole evaluation set
+            self._make_track_ids_unique(curr_data)
+
+            # merge categories marked with a merged tag in TAO dataset
+            self._merge_categories(curr_data)
+
+            # get tracker sequence information
+            curr_videos_to_tracker_tracks, curr_videos_to_tracker_images = self._compute_vid_mappings(curr_data)
+            self.tracker_data[tracker]['vids_to_tracks'] = curr_videos_to_tracker_tracks
+            self.tracker_data[tracker]['vids_to_images'] = curr_videos_to_tracker_images
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the TAO format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets]: list (for each timestep) of lists of detections.
+        [classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_lengths]: dictionary with class values
+                                as keys and lists (for each track) as values
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        [classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_dt_track_ids, classes_to_dt_track_areas, classes_to_dt_track_lengths]: dictionary with class values
+                                                                                           as keys and lists as values
+        [classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
+        """
+        seq_id = self.seq_name_to_seq_id[seq]
+        # File location
+        if is_gt:
+            imgs = self.videos_to_gt_images[seq_id]
+        else:
+            imgs = self.tracker_data[tracker]['vids_to_images'][seq_id]
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq_id]
+        img_to_timestep = self.seq_to_images_to_timestep[seq_id]
+        data_keys = ['ids', 'classes', 'dets']
+        if not is_gt:
+            data_keys += ['tracker_confidences']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+        for img in imgs:
+            # some tracker data contains images without any ground truth information, these are ignored
+            try:
+                t = img_to_timestep[img['id']]
+            except KeyError:
+                continue
+            annotations = img['annotations']
+            raw_data['dets'][t] = np.atleast_2d([ann['bbox'] for ann in annotations]).astype(float)
+            raw_data['ids'][t] = np.atleast_1d([ann['track_id'] for ann in annotations]).astype(int)
+            raw_data['classes'][t] = np.atleast_1d([1 for _ in annotations]).astype(int)   # class-agnostic
+            if not is_gt:
+                raw_data['tracker_confidences'][t] = np.atleast_1d([ann['score'] for ann in annotations]).astype(float)
+
+        for t, d in enumerate(raw_data['dets']):
+            if d is None:
+                raw_data['dets'][t] = np.empty((0, 4)).astype(float)
+                raw_data['ids'][t] = np.empty(0).astype(int)
+                raw_data['classes'][t] = np.empty(0).astype(int)
+                if not is_gt:
+                    raw_data['tracker_confidences'][t] = np.empty(0)
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        # all_classes = [self.class_name_to_class_id[cls] for cls in self.class_list]
+        all_classes = [1]  # class-agnostic
+
+        if is_gt:
+            classes_to_consider = all_classes
+            all_tracks = self.videos_to_gt_tracks[seq_id]
+        else:
+            # classes_to_consider = self.seq_to_classes[seq_id]['pos_cat_ids'] \
+            #                       + self.seq_to_classes[seq_id]['neg_cat_ids']
+            classes_to_consider = all_classes  # class-agnostic
+            all_tracks = self.tracker_data[tracker]['vids_to_tracks'][seq_id]
+
+        # classes_to_tracks = {cls: [track for track in all_tracks if track['category_id'] == cls]
+        #                      if cls in classes_to_consider else [] for cls in all_classes}
+        classes_to_tracks = {cls: [track for track in all_tracks]
+        if cls in classes_to_consider else [] for cls in all_classes}  # class-agnostic
+
+        # mapping from classes to track information
+        raw_data['classes_to_tracks'] = {cls: [{det['image_id']: self._box_or_mask_from_det(det)
+                                                for det in track['annotations']} for track in tracks]
+                                         for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
+                                            for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
+                                              for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_lengths'] = {cls: [len(track['annotations']) for track in tracks]
+                                                for cls, tracks in classes_to_tracks.items()}
+
+        if not is_gt:
+            raw_data['classes_to_dt_track_scores'] = {cls: np.array([np.mean([float(x['score'])
+                                                                              for x in track['annotations']])
+                                                                     for track in tracks])
+                                                      for cls, tracks in classes_to_tracks.items()}
+
+        if is_gt:
+            key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
+                       'classes_to_track_ids': 'classes_to_gt_track_ids',
+                       'classes_to_track_lengths': 'classes_to_gt_track_lengths',
+                       'classes_to_track_areas': 'classes_to_gt_track_areas'}
+        else:
+            key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
+                       'classes_to_track_ids': 'classes_to_dt_track_ids',
+                       'classes_to_track_lengths': 'classes_to_dt_track_lengths',
+                       'classes_to_track_areas': 'classes_to_dt_track_areas'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        raw_data['num_timesteps'] = num_timesteps
+        raw_data['neg_cat_ids'] = self.seq_to_classes[seq_id]['neg_cat_ids']
+        raw_data['not_exhaustively_labeled_cls'] = self.seq_to_classes[seq_id]['not_exhaustively_labeled_cat_ids']
+        raw_data['seq'] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+        TAO:
+            In TAO, the 4 preproc steps are as follow:
+                1) All classes present in the ground truth data are evaluated separately.
+                2) No matched tracker detections are removed.
+                3) Unmatched tracker detections are removed if there is not ground truth data and the class does not
+                    belong to the categories marked as negative for this sequence. Additionally, unmatched tracker
+                    detections for classes which are marked as not exhaustively labeled are removed.
+                4) No gt detections are removed.
+            Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
+            and the tracks from the tracker data are sorted according to the tracker confidence.
+        """
+        cls_id = self.class_name_to_class_id[cls]
+        is_not_exhaustively_labeled = cls_id in raw_data['not_exhaustively_labeled_cls']
+        is_neg_category = cls_id in raw_data['neg_cat_ids']
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+        for t in range(raw_data['num_timesteps']):
+
+            # Only extract relevant dets for this class for preproc and eval (cls)
+            gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
+            gt_class_mask = gt_class_mask.astype(np.bool)
+            gt_ids = raw_data['gt_ids'][t][gt_class_mask]
+            gt_dets = raw_data['gt_dets'][t][gt_class_mask]
+
+            tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
+            tracker_class_mask = tracker_class_mask.astype(np.bool)
+            tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
+            tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
+            tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
+            similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
+
+            # Match tracker and gt dets (with hungarian algorithm).
+            unmatched_indices = np.arange(tracker_ids.shape[0])
+            if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
+                matching_scores = similarity_scores.copy()
+                matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
+                match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
+                match_cols = match_cols[actually_matched_mask]
+                unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
+
+            if gt_ids.shape[0] == 0 and not is_neg_category:
+                to_remove_tracker = unmatched_indices
+            elif is_not_exhaustively_labeled:
+                to_remove_tracker = unmatched_indices
+            else:
+                to_remove_tracker = np.array([], dtype=np.int)
+
+            # remove all unwanted unmatched tracker detections
+            data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+            data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+            data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
+            similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+
+            data['gt_ids'][t] = gt_ids
+            data['gt_dets'][t] = gt_dets
+            data['similarity_scores'][t] = similarity_scores
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+        data['seq'] = raw_data['seq']
+
+        # get track representations
+        data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
+        data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
+        data['gt_track_lengths'] = raw_data['classes_to_gt_track_lengths'][cls_id]
+        data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
+        data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
+        data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
+        data['dt_track_lengths'] = raw_data['classes_to_dt_track_lengths'][cls_id]
+        data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
+        data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
+        data['not_exhaustively_labeled'] = is_not_exhaustively_labeled
+        data['iou_type'] = self._iou_type()
+
+        # sort tracker data tracks by tracker confidence scores
+        if data['dt_tracks']:
+            idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
+            data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
+            data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
+            data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
+            data['dt_track_lengths'] = [data['dt_track_lengths'][i] for i in idx]
+            data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
+        # Ensure that ids are unique per timestep.
+        self._check_unique_ids(data)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t)
+        return similarity_scores
+
+    def _merge_categories(self, annotations):
+        """
+        Merges categories with a merged tag. Adapted from https://github.com/TAO-Dataset
+        :param annotations: the annotations in which the classes should be merged
+        :return: None
+        """
+        merge_map = {}
+        for category in self.gt_data['categories']:
+            if 'merged' in category:
+                for to_merge in category['merged']:
+                    merge_map[to_merge['id']] = category['id']
+
+        for ann in annotations:
+            ann['category_id'] = merge_map.get(ann['category_id'], ann['category_id'])
+
+    def _compute_vid_mappings(self, annotations):
+        """
+        Computes mappings from Videos to corresponding tracks and images.
+        :param annotations: the annotations for which the mapping should be generated
+        :return: the video-to-track-mapping, the video-to-image-mapping
+        """
+        vids_to_tracks = {}
+        vids_to_imgs = {}
+        vid_ids = [vid['id'] for vid in self.gt_data['videos']]
+
+        # compute an mapping from image IDs to images
+        images = {}
+        for image in self.gt_data['images']:
+            images[image['id']] = image
+
+        for ann in annotations:
+            ann["area"] = self._calculate_area_for_ann(ann)
+
+            vid = ann["video_id"]
+            if ann["video_id"] not in vids_to_tracks.keys():
+                vids_to_tracks[ann["video_id"]] = list()
+            if ann["video_id"] not in vids_to_imgs.keys():
+                vids_to_imgs[ann["video_id"]] = list()
+
+            # Fill in vids_to_tracks
+            tid = ann["track_id"]
+            exist_tids = [track["id"] for track in vids_to_tracks[vid]]
+            try:
+                index1 = exist_tids.index(tid)
+            except ValueError:
+                index1 = -1
+            if tid not in exist_tids:
+                curr_track = {"id": tid, "category_id": ann["category_id"],
+                              "video_id": vid, "annotations": [ann]}
+                vids_to_tracks[vid].append(curr_track)
+            else:
+                vids_to_tracks[vid][index1]["annotations"].append(ann)
+
+            # Fill in vids_to_imgs
+            img_id = ann['image_id']
+            exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
+            try:
+                index2 = exist_img_ids.index(img_id)
+            except ValueError:
+                index2 = -1
+            if index2 == -1:
+                curr_img = {"id": img_id, "annotations": [ann]}
+                vids_to_imgs[vid].append(curr_img)
+            else:
+                vids_to_imgs[vid][index2]["annotations"].append(ann)
+
+        # sort annotations by frame index and compute track area
+        for vid, tracks in vids_to_tracks.items():
+            for track in tracks:
+                track["annotations"] = sorted(
+                    track['annotations'],
+                    key=lambda x: images[x['image_id']]['frame_index'])
+                # Computer average area
+                track["area"] = (sum(x['area'] for x in track['annotations']) / len(track['annotations']))
+
+        # Ensure all videos are present
+        for vid_id in vid_ids:
+            if vid_id not in vids_to_tracks.keys():
+                vids_to_tracks[vid_id] = []
+            if vid_id not in vids_to_imgs.keys():
+                vids_to_imgs[vid_id] = []
+
+        return vids_to_tracks, vids_to_imgs
+
+    def _compute_image_to_timestep_mappings(self):
+        """
+        Computes a mapping from images to the corresponding timestep in the sequence.
+        :return: the image-to-timestep-mapping
+        """
+        images = {}
+        for image in self.gt_data['images']:
+            images[image['id']] = image
+
+        seq_to_imgs_to_timestep = {vid['id']: dict() for vid in self.gt_data['videos']}
+        for vid in seq_to_imgs_to_timestep:
+            curr_imgs = [img['id'] for img in self.videos_to_gt_images[vid]]
+            curr_imgs = sorted(curr_imgs, key=lambda x: images[x]['frame_index'])
+            seq_to_imgs_to_timestep[vid] = {curr_imgs[i]: i for i in range(len(curr_imgs))}
+
+        return seq_to_imgs_to_timestep
+
+    def _limit_dets_per_image(self, annotations):
+        """
+        Limits the number of detections for each image to config['MAX_DETECTIONS']. Adapted from
+        https://github.com/TAO-Dataset/
+        :param annotations: the annotations in which the detections should be limited
+        :return: the annotations with limited detections
+        """
+        max_dets = self.config['MAX_DETECTIONS']
+        img_ann = defaultdict(list)
+        for ann in annotations:
+            img_ann[ann["image_id"]].append(ann)
+
+        for img_id, _anns in img_ann.items():
+            if len(_anns) <= max_dets:
+                continue
+            _anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
+            img_ann[img_id] = _anns[:max_dets]
+
+        return [ann for anns in img_ann.values() for ann in anns]
+
+    def _fill_video_ids_inplace(self, annotations):
+        """
+        Fills in missing video IDs inplace. Adapted from https://github.com/TAO-Dataset/
+        :param annotations: the annotations for which the videos IDs should be filled inplace
+        :return: None
+        """
+        missing_video_id = [x for x in annotations if 'video_id' not in x]
+        if missing_video_id:
+            image_id_to_video_id = {
+                x['id']: x['video_id'] for x in self.gt_data['images']
+            }
+            for x in missing_video_id:
+                x['video_id'] = image_id_to_video_id[x['image_id']]
+
+    @staticmethod
+    def _make_track_ids_unique(annotations):
+        """
+        Makes the track IDs unqiue over the whole annotation set. Adapted from https://github.com/TAO-Dataset/
+        :param annotations: the annotation set
+        :return: the number of updated IDs
+        """
+        track_id_videos = {}
+        track_ids_to_update = set()
+        max_track_id = 0
+        for ann in annotations:
+            t = ann['track_id']
+            if t not in track_id_videos:
+                track_id_videos[t] = ann['video_id']
+
+            if ann['video_id'] != track_id_videos[t]:
+                # Track id is assigned to multiple videos
+                track_ids_to_update.add(t)
+            max_track_id = max(max_track_id, t)
+
+        if track_ids_to_update:
+            #print('true')
+            next_id = itertools.count(max_track_id + 1)
+            new_track_ids = defaultdict(lambda: next(next_id))
+            for ann in annotations:
+                t = ann['track_id']
+                v = ann['video_id']
+                if t in track_ids_to_update:
+                    ann['track_id'] = new_track_ids[t, v]
+        return len(track_ids_to_update)
+
+    def _split_known_unknown_distractor(self):
+        all_ids = set([i for i in range(1, 2000)])  # 2000 is larger than the max category id in TAO-OW.
+        # `knowns` includes 78 TAO_category_ids that corresponds to 78 COCO classes.
+        # (The other 2 COCO classes do not have corresponding classes in TAO).
+        self.knowns = {4, 13, 1038, 544, 1057, 34, 35, 36, 41, 45, 58, 60, 579, 1091, 1097, 1099, 78, 79, 81, 91, 1115,
+                     1117, 95, 1122, 99, 1132, 621, 1135, 625, 118, 1144, 126, 642, 1155, 133, 1162, 139, 154, 174, 185,
+                     699, 1215, 714, 717, 1229, 211, 729, 221, 229, 747, 235, 237, 779, 276, 805, 299, 829, 852, 347,
+                     371, 382, 896, 392, 926, 937, 428, 429, 961, 452, 979, 980, 982, 475, 480, 993, 1001, 502, 1018}
+        # `distractors` is defined as in the paper "Opening up Open-World Tracking"
+        self.distractors = {20, 63, 108, 180, 188, 204, 212, 247, 303, 403, 407, 415, 490, 504, 507, 513, 529, 567,
+                            569, 588, 672, 691, 702, 708, 711, 720, 736, 737, 798, 813, 815, 827, 831, 851, 877, 883,
+                            912, 971, 976, 1130, 1133, 1134, 1169, 1184, 1220}
+        self.unknowns = all_ids.difference(self.knowns.union(self.distractors))
+
+    def _filter_gt_data(self, raw_gt_data):
+        """
+        Filter out irrelevant data in the raw_gt_data
+        Args:
+            raw_gt_data: directly loaded from json.
+
+        Returns:
+            filtered gt_data
+        """
+        valid_cat_ids = list()
+        if self.subset == "known":
+            valid_cat_ids = self.knowns
+        elif self.subset == "distractor":
+            valid_cat_ids = self.distractors
+        elif self.subset == "unknown":
+            valid_cat_ids = self.unknowns
+        # elif self.subset == "test_only_unknowns":
+        #     valid_cat_ids = test_only_unknowns
+        else:
+            raise Exception("The parameter `SUBSET` is incorrect")
+
+        filtered = dict()
+        filtered["videos"] = raw_gt_data["videos"]
+        # filtered["videos"] = list()
+        unwanted_vid = set()
+        # for video in raw_gt_data["videos"]:
+        #     datasrc = video["name"].split('/')[1]
+        #     if datasrc in data_srcs:
+        #         filtered["videos"].append(video)
+        #     else:
+        #         unwanted_vid.add(video["id"])
+
+        filtered["annotations"] = list()
+        for ann in raw_gt_data["annotations"]:
+            if (ann["video_id"] not in unwanted_vid) and (ann["category_id"] in valid_cat_ids):
+                filtered["annotations"].append(ann)
+
+        filtered["tracks"] = list()
+        for track in raw_gt_data["tracks"]:
+            if (track["video_id"] not in unwanted_vid) and (track["category_id"] in valid_cat_ids):
+                filtered["tracks"].append(track)
+
+        filtered["images"] = list()
+        for image in raw_gt_data["images"]:
+            if image["video_id"] not in unwanted_vid:
+                filtered["images"].append(image)
+
+        filtered["categories"] = list()
+        for cat in raw_gt_data["categories"]:
+            if cat["id"] in valid_cat_ids:
+                filtered["categories"].append(cat)
+
+        if "info" in raw_gt_data:
+            filtered["info"] = raw_gt_data["info"]
+        if "licenses" in raw_gt_data:
+            filtered["licenses"] = raw_gt_data["licenses"]
+
+        if "track_id_offsets" in raw_gt_data:
+            filtered["track_id_offsets"] = raw_gt_data["track_id_offsets"]
+
+        if "split" in raw_gt_data:
+            filtered["split"] = raw_gt_data["split"]
+
+        return filtered
--- a/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/convert_burst_format_to_tao_format.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/convert_burst_format_to_tao_format.py
@@ -0,0 +1,39 @@
+import json
+import argparse
+from .format_converter import GroundTruthBURSTFormatToTAOFormatConverter, PredictionBURSTFormatToTAOFormatConverter
+
+
+def main(args):
+    with open(args.gt_input_file) as f:
+        ali_format_gt = json.load(f)
+    tao_format_gt = GroundTruthBURSTFormatToTAOFormatConverter(
+        ali_format_gt, args.split).convert()
+    with open(args.gt_output_file, 'w') as f:
+        json.dump(tao_format_gt, f)
+
+    if args.pred_input_file is None:
+        return
+    with open(args.pred_input_file) as f:
+        ali_format_pred = json.load(f)
+    tao_format_pred = PredictionBURSTFormatToTAOFormatConverter(
+        tao_format_gt, ali_format_pred, args.split,
+        args.exemplar_guided).convert()
+    with open(args.pred_output_file, 'w') as f:
+        json.dump(tao_format_pred, f)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--gt_input_file', type=str,
+        default='../data/gt/tsunami/exemplar_guided/validation_all_annotations.json')
+    parser.add_argument('--gt_output_file', type=str,
+                        default='/tmp/val_gt.json')
+    parser.add_argument('--pred_input_file', type=str,
+                        default='../data/trackers/tsunami/exemplar_guided/STCN_off_the_shelf/data/results.json')
+    parser.add_argument('--pred_output_file', type=str,
+                        default='/tmp/pred.json')
+    parser.add_argument('--split', type=str, default='validation')
+    parser.add_argument('--exemplar_guided', type=bool, default=True)
+    args_ = parser.parse_args()
+    main(args_)
--- a/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/format_converter.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/format_converter.py
@@ -0,0 +1,259 @@
+import os
+import json
+import pycocotools.mask as cocomask
+from tabulate import tabulate
+from typing import Union
+
+
+def _global_track_id(*, local_track_id: Union[str, int],
+                     video_id: Union[str, int],
+                     track_id_mapping) -> int:
+    # remap local track ids into globally unique ids
+    return track_id_mapping[str(video_id)][str(local_track_id)]
+
+
+class GroundTruthBURSTFormatToTAOFormatConverter:
+    def __init__(self, ali_format):
+        self._ali_format = ali_format
+        self._split = ali_format['split']
+        self._categories = self._make_categories()
+        self._videos = []
+        self._annotations = []
+        self._tracks = {}
+        self._images = []
+        self._next_img_id = 0
+        self._next_ann_id = 0
+
+        self._track_id_mapping = self._load_track_id_mapping()
+
+        for seq in ali_format['sequences']:
+            self._visit_seq(seq)
+
+    def _load_track_id_mapping(self):
+        id_map = {}
+        next_global_track_id = 1
+        for seq in self._ali_format['sequences']:
+            seq_id = seq['id']
+            seq_id_map = {}
+            id_map[str(seq_id)] = seq_id_map
+            for local_track_id in seq['track_category_ids']:
+                seq_id_map[str(local_track_id)] = next_global_track_id
+                next_global_track_id += 1
+        return id_map
+
+    def global_track_id(self, *, local_track_id: Union[str, int],
+                        video_id: Union[str, int]) -> int:
+        return _global_track_id(local_track_id=local_track_id,
+                                video_id=video_id,
+                                track_id_mapping=self._track_id_mapping)
+
+    def _visit_seq(self, seq):
+        self._make_video(seq)
+        imgs = self._make_images(seq)
+        self._make_annotations_and_tracks(seq, imgs)
+
+    def _make_images(self, seq):
+        imgs = []
+        for img_path in seq['annotated_image_paths']:
+            video = self._split + '/' + seq['dataset'] + '/' + seq['seq_name']
+            file_name = video + '/' + img_path
+
+            # TODO: once python 3.9 is more common, we can use this nicer and safer code
+            #stripped = img_path.removesuffix('.jpg').removesuffix('.png').removeprefix('frame')
+            stripped = img_path.replace('.jpg', '').replace('.png', '').replace('frame', '')
+
+            last = stripped.split('_')[-1]
+            frame_idx = int(last)
+
+            img = {'id': self._next_img_id, 'video': video,
+                   'width': seq['width'], 'height': seq['height'],
+                   'file_name': file_name,
+                   'frame_index': frame_idx,
+                   'video_id': seq['id']}
+            self._next_img_id += 1
+            self._images.append(img)
+            imgs.append(img)
+        return imgs
+
+    def _make_video(self, seq):
+        video_id = seq['id']
+        dataset = seq['dataset']
+        seq_name = seq['seq_name']
+        name = f'{self._split}/' + dataset + '/' + seq_name
+        video = {
+            'id': video_id, 'width': seq['width'], 'height': seq['height'],
+            'neg_category_ids': seq['neg_category_ids'],
+            'not_exhaustive_category_ids': seq['not_exhaustive_category_ids'],
+            'name': name, 'metadata': {'dataset': dataset}}
+        self._videos.append(video)
+
+    def _make_annotations_and_tracks(self, seq, imgs):
+        video_id = seq['id']
+        segs = seq['segmentations']
+        assert len(segs) == len(imgs), (len(segs), len(imgs))
+        for frame_segs, img in zip(segs, imgs):
+            for local_track_id, seg in frame_segs.items():
+                distractors = {20, 63, 108, 180, 188, 204, 212, 247, 303, 403, 407, 415, 490, 504, 507, 513, 529, 567,
+                               569, 588, 672, 691, 702, 708, 711, 720, 736, 737, 798, 813, 815, 827, 831, 851, 877, 883,
+                               912, 971, 976, 1130, 1133, 1134, 1169, 1184, 1220}
+                global_track_id = self.global_track_id(
+                    local_track_id=local_track_id, video_id=seq['id'])
+                rle = seg['rle']
+                segmentation = {'counts': rle,
+                                'size': [img['height'], img['width']]}
+                image_id = img['id']
+                category_id = int(seq['track_category_ids'][local_track_id])
+                if category_id in distractors:
+                    continue
+                coco_bbox = cocomask.toBbox(segmentation)
+                bbox = [int(x) for x in coco_bbox]
+                ann = {'segmentation': segmentation, 'id': self._next_ann_id,
+                       'image_id': image_id, 'category_id': category_id,
+                       'track_id': global_track_id, 'video_id': video_id,
+                       'bbox': bbox}
+                self._next_ann_id += 1
+                self._annotations.append(ann)
+
+                if global_track_id not in self._tracks:
+                    track = {'id': global_track_id, 'category_id': category_id,
+                             'video_id': video_id}
+                    self._tracks[global_track_id] = track
+
+    def convert(self):
+        tracks = sorted(self._tracks.values(), key=lambda t: t['id'])
+        return {'videos': self._videos, 'annotations': self._annotations,
+                'tracks': tracks, 'images': self._images,
+                'categories': self._categories,
+                'track_id_mapping': self._track_id_mapping,
+                'split': self._split}
+
+    def _make_categories(self):
+        tao_categories_path = os.path.join(os.path.dirname(__file__), 'tao_categories.json')
+        with open(tao_categories_path) as f:
+            return json.load(f)
+
+
+class PredictionBURSTFormatToTAOFormatConverter:
+    def __init__(self, gt, ali_format, exemplar_guided):
+        self._gt = gt
+        self._ali_format = ali_format
+        if 'split' in ali_format:
+            self._split = ali_format['split']
+            gt_split = self._gt['split']
+            assert self._split == gt_split, (self._split, gt_split)
+        else:
+            self._split = self._gt['split']
+        self._exemplar_guided = exemplar_guided
+        self._result = []
+        self._next_det_id = 0
+
+        self._img_by_filename = {}
+        for img in self._gt['images']:
+            file_name = img['file_name']
+            assert file_name not in self._img_by_filename
+            self._img_by_filename[file_name] = img
+
+        self._gt_track_by_track_id = {}
+        for track in self._gt['tracks']:
+            self._gt_track_by_track_id[int(track['id'])] = track
+
+        self._filtered_out_track_ids = set()
+
+        for seq in ali_format['sequences']:
+            self._visit_seq(seq)
+
+        if exemplar_guided and len(self._filtered_out_track_ids) > 0:
+            self.print_filter_out_debug_info(ali_format)
+
+    def print_filter_out_debug_info(self, ali_format):
+        track_ids_in_pred = set()
+        a_dict_for_debugging = {}
+        for seq in ali_format['sequences']:
+            for local_track_id in seq['track_category_ids']:
+                global_track_id = _global_track_id(
+                    local_track_id=local_track_id, video_id=seq['id'],
+                    track_id_mapping=self._gt['track_id_mapping'])
+                track_ids_in_pred.add(global_track_id)
+                a_dict_for_debugging[global_track_id] = {'seq': seq,
+                                                         'local_track_id': local_track_id}
+        print('Number of Track ids in pred:', len(track_ids_in_pred))
+        print('Exemplar Guided: Filtered out',
+              len(self._filtered_out_track_ids),
+              'tracks which were not found in the ground truth.')
+        track_ids_after_filtering = set(d['track_id'] for d in self._result)
+        print('Number of tracks after filtering:',
+              len(track_ids_after_filtering))
+        problem_tracks = list(
+            track_ids_in_pred - track_ids_after_filtering - self._filtered_out_track_ids)
+        if len(problem_tracks) > 0:
+            print("\nWARNING:", len(problem_tracks),
+                  "object tracks are not present. There could be a number of reasons for this:\n"
+                  "(1) If you are running evaluation for the box/point exemplar-guided task then this is to be expected"
+                  " because your tracker probably didn't predict masks for every ground-truth object instance.\n"
+                  "(2) If you are running evaluation for the mask exemplar-guided task, then this could indicate a "
+                  "problem. Assume that you copied the given first-frame object mask to your predicted result, this "
+                  "should not happen. It could be that your predictions are at the wrong frame-rate i.e. you have no "
+                  "predicted masks for video frames which will be evaluated.\n")
+
+            rows = []
+            for xx in problem_tracks:
+                rows.append([a_dict_for_debugging[xx]['seq']['dataset'],
+                             a_dict_for_debugging[xx]['seq']['seq_name'],
+                             a_dict_for_debugging[xx]['local_track_id']])
+
+            print("For your reference, the sequence name and track IDs for these missing tracks are:")
+            print(tabulate(rows, ["Dataset", "Sequence Name", "Track ID"]))
+
+    def _visit_seq(self, seq):
+        dataset = seq['dataset']
+        seq_name = seq['seq_name']
+        assert len(seq['segmentations']) == len(seq['annotated_image_paths'])
+        for frame_segs, img_path in zip(seq['segmentations'],
+                                        seq['annotated_image_paths']):
+            for local_track_id_str, track_det in frame_segs.items():
+                rle = track_det['rle']
+
+                file_name = self._split + '/' + dataset + '/' + seq_name + '/' + img_path
+                # the result might have a higher frame rate than the ground truth
+                if file_name not in self._img_by_filename:
+                    continue
+
+                img = self._img_by_filename[file_name]
+                img_id = img['id']
+                height = img['height']
+                width = img['width']
+                segmentation = {'counts': rle, 'size': [height, width]}
+
+                local_track_id = int(local_track_id_str)
+                if self._exemplar_guided:
+                    global_track_id = _global_track_id(
+                        local_track_id=local_track_id, video_id=seq['id'],
+                        track_id_mapping=self._gt['track_id_mapping'])
+                else:
+                    global_track_id = local_track_id
+                coco_bbox = cocomask.toBbox(segmentation)
+                bbox = [int(x) for x in coco_bbox]
+                det = {'id': self._next_det_id, 'image_id': img_id,
+                       'track_id': global_track_id, 'bbox': bbox,
+                       'segmentation': segmentation}
+                if self._exemplar_guided:
+                    if global_track_id not in self._gt_track_by_track_id:
+                        self._filtered_out_track_ids.add(global_track_id)
+                        continue
+                    gt_track = self._gt_track_by_track_id[global_track_id]
+                    category_id = gt_track['category_id']
+                    det['category_id'] = category_id
+                elif 'category_id' in track_det:
+                    det['category_id'] = track_det['category_id']
+                else:
+                    category_id = seq['track_category_ids'][local_track_id_str]
+                    det['category_id'] = category_id
+                self._next_det_id += 1
+                if 'score' in track_det:
+                    det['score'] = track_det['score']
+                else:
+                    det['score'] = 1.0
+                self._result.append(det)
+
+    def convert(self):
+        return self._result
--- a/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/tao_categories.json
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/burst_helpers/tao_categories.json
--- a/test/yolov7-tracker/tracker/trackeval/datasets/burst_ow.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/burst_ow.py
@@ -0,0 +1,91 @@
+import json
+import os
+from .burst_helpers.burst_ow_base import BURST_OW_Base
+from .burst_helpers.format_converter import GroundTruthBURSTFormatToTAOFormatConverter, PredictionBURSTFormatToTAOFormatConverter
+from .. import utils
+
+
+class BURST_OW(BURST_OW_Base):
+    """Dataset class for TAO tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        tao_config = BURST_OW_Base.get_default_dataset_config()
+        code_path = utils.get_code_path()
+        tao_config['GT_FOLDER'] = os.path.join(
+            code_path, 'data/gt/burst/all_classes/val/')  # Location of GT data
+        tao_config['TRACKERS_FOLDER'] = os.path.join(
+            code_path, 'data/trackers/burst/open-world/val/')  # Trackers location
+        return tao_config
+
+    def _iou_type(self):
+        return 'mask'
+
+    def _box_or_mask_from_det(self, det):
+        if "segmentation" in det:
+            return det["segmentation"]
+        else:
+            return det["mask"]
+
+    def _calculate_area_for_ann(self, ann):
+        import pycocotools.mask as cocomask
+        seg = self._box_or_mask_from_det(ann)
+        return cocomask.area(seg)
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
+        return similarity_scores
+
+    def _postproc_ground_truth_data(self, data):
+        return GroundTruthBURSTFormatToTAOFormatConverter(data).convert()
+
+    def _postproc_prediction_data(self, data):
+        # if it's a list, it's already in TAO format and not in Ali format
+        # however the image ids do not match and need to be remapped
+        if isinstance(data, list):
+            _remap_image_ids(data, self.gt_data)
+            return data
+
+        return PredictionBURSTFormatToTAOFormatConverter(
+            self.gt_data, data,
+            exemplar_guided=False).convert()
+
+
+def _remap_image_ids(pred_data, ali_gt_data):
+    code_path = utils.get_code_path()
+    if 'split' in ali_gt_data:
+        split = ali_gt_data['split']
+    else:
+        split = 'val'
+
+    if split in ('val', 'validation'):
+        tao_gt_path = os.path.join(
+            code_path, 'data/gt/tao/tao_validation/gt.json')
+    else:
+        tao_gt_path = os.path.join(
+            code_path, 'data/gt/tao/tao_test/test_without_annotations.json')
+
+    with open(tao_gt_path) as f:
+        tao_gt = json.load(f)
+
+    tao_img_by_id = {}
+    for img in tao_gt['images']:
+        img_id = img['id']
+        tao_img_by_id[img_id] = img
+
+    ali_img_id_by_filename = {}
+    for ali_img in ali_gt_data['images']:
+        ali_img_id = ali_img['id']
+        file_name = ali_img['file_name'].replace("validation", "val")
+        ali_img_id_by_filename[file_name] = ali_img_id
+
+    ali_img_id_by_tao_img_id = {}
+    for tao_img_id, tao_img in tao_img_by_id.items():
+        file_name = tao_img['file_name']
+        ali_img_id = ali_img_id_by_filename[file_name]
+        ali_img_id_by_tao_img_id[tao_img_id] = ali_img_id
+
+    for det in pred_data:
+        tao_img_id = det['image_id']
+        ali_img_id = ali_img_id_by_tao_img_id[tao_img_id]
+        det['image_id'] = ali_img_id
--- a/test/yolov7-tracker/tracker/trackeval/datasets/davis.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/davis.py
@@ -0,0 +1,276 @@
+import os
+import csv
+import numpy as np
+from ._base_dataset import _BaseDataset
+from ..utils import TrackEvalException
+from .. import utils
+from .. import _timing
+
+
+class DAVIS(_BaseDataset):
+    """Dataset class for DAVIS tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/davis/davis_unsupervised_val/'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/davis/davis_unsupervised_val/'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'SPLIT_TO_EVAL': 'val',  # Valid: 'val', 'train'
+            'CLASSES_TO_EVAL': ['general'],
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            'SEQMAP_FILE': None,  # Specify seqmap file
+            'SEQ_INFO': None,  # If not None, directly specify sequences to eval and their number of timesteps
+            # '{gt_folder}/Annotations_unsupervised/480p/{seq}'
+            'MAX_DETECTIONS': 0  # Maximum number of allowed detections per sequence (0 for no threshold)
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+        # defining a default class since there are no classes in DAVIS
+        self.should_classes_combine = False
+        self.use_super_categories = False
+
+        self.gt_fol = self.config['GT_FOLDER']
+        self.tracker_fol = self.config['TRACKERS_FOLDER']
+
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.config['TRACKERS_FOLDER']
+
+        self.max_det = self.config['MAX_DETECTIONS']
+
+        # Get classes to eval
+        self.valid_classes = ['general']
+        self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+                           for cls in self.config['CLASSES_TO_EVAL']]
+        if not all(self.class_list):
+            raise TrackEvalException('Attempted to evaluate an invalid class. Only general class is valid.')
+
+        # Get sequences to eval
+        if self.config["SEQ_INFO"]:
+            self.seq_list = list(self.config["SEQ_INFO"].keys())
+            self.seq_lengths = self.config["SEQ_INFO"]
+        elif self.config["SEQMAP_FILE"]:
+            self.seq_list = []
+            seqmap_file = self.config["SEQMAP_FILE"]
+            if not os.path.isfile(seqmap_file):
+                raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
+            with open(seqmap_file) as fp:
+                reader = csv.reader(fp)
+                for i, row in enumerate(reader):
+                    if row[0] == '':
+                        continue
+                    seq = row[0]
+                    self.seq_list.append(seq)
+        else:
+            self.seq_list = os.listdir(self.gt_fol)
+
+        self.seq_lengths = {seq: len(os.listdir(os.path.join(self.gt_fol, seq))) for seq in self.seq_list}
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+        for tracker in self.tracker_list:
+            for seq in self.seq_list:
+                curr_dir = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq)
+                if not os.path.isdir(curr_dir):
+                    print('Tracker directory not found: ' + curr_dir)
+                    raise TrackEvalException('Tracker directory not found: ' +
+                                             os.path.join(tracker, self.tracker_sub_fol, seq))
+                tr_timesteps = len(os.listdir(curr_dir))
+                if self.seq_lengths[seq] != tr_timesteps:
+                    raise TrackEvalException('GT folder and tracker folder have a different number'
+                                             'timesteps for tracker %s and sequence %s' % (tracker, seq))
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the DAVIS format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets]: list (for each timestep) of lists of detections.
+        [masks_void]: list of masks with void pixels (pixels to be ignored during evaluation)
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        """
+
+        # Only loaded when run to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+        from PIL import Image
+
+        # File location
+        if is_gt:
+            seq_dir = os.path.join(self.gt_fol, seq)
+        else:
+            seq_dir = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq)
+
+        num_timesteps = self.seq_lengths[seq]
+        data_keys = ['ids', 'dets', 'masks_void']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+
+        # read frames
+        frames = [os.path.join(seq_dir, im_name) for im_name in sorted(os.listdir(seq_dir))]
+
+        id_list = []
+        for t in range(num_timesteps):
+            frame = np.array(Image.open(frames[t]))
+            if is_gt:
+                void = frame == 255
+                frame[void] = 0
+                raw_data['masks_void'][t] = mask_utils.encode(np.asfortranarray(void.astype(np.uint8)))
+            id_values = np.unique(frame)
+            id_values = id_values[id_values != 0]
+            id_list += list(id_values)
+            tmp = np.ones((len(id_values), *frame.shape))
+            tmp = tmp * id_values[:, None, None]
+            masks = np.array(tmp == frame[None, ...]).astype(np.uint8)
+            raw_data['dets'][t] = mask_utils.encode(np.array(np.transpose(masks, (1, 2, 0)), order='F'))
+            raw_data['ids'][t] = id_values.astype(int)
+        num_objects = len(np.unique(id_list))
+
+        if not is_gt and num_objects > self.max_det > 0:
+            raise Exception('Number of proposals (%i) for sequence %s exceeds number of maximum allowed proposals (%i).'
+                            % (num_objects, seq, self.max_det))
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+        raw_data["num_timesteps"] = num_timesteps
+        raw_data['mask_shape'] = np.array(Image.open(frames[0])).shape
+        if is_gt:
+            raw_data['num_gt_ids'] = num_objects
+        else:
+            raw_data['num_tracker_ids'] = num_objects
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detection masks.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+
+        DAVIS:
+            In DAVIS, the 4 preproc steps are as follow:
+                1) There are no classes, all detections are evaluated jointly
+                2) No matched tracker detections are removed.
+                3) No unmatched tracker detections are removed.
+                4) There are no ground truth detections (e.g. those of distractor classes) to be removed.
+            Preprocessing special to DAVIS: Pixels which are marked as void in the ground truth are set to zero in the
+                tracker detections since they are not considered during evaluation.
+        """
+
+        # Only loaded when run to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        num_gt_dets = 0
+        num_tracker_dets = 0
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_timesteps = raw_data['num_timesteps']
+
+        # count detections
+        for t in range(num_timesteps):
+            num_gt_dets += len(raw_data['gt_dets'][t])
+            num_tracker_dets += len(raw_data['tracker_dets'][t])
+            unique_gt_ids += list(np.unique(raw_data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(raw_data['tracker_ids'][t]))
+
+        data['gt_ids'] = raw_data['gt_ids']
+        data['gt_dets'] = raw_data['gt_dets']
+        data['similarity_scores'] = raw_data['similarity_scores']
+        data['tracker_ids'] = raw_data['tracker_ids']
+
+        # set void pixels in tracker detections to zero
+        for t in range(num_timesteps):
+            void_mask = raw_data['masks_void'][t]
+            if mask_utils.area(void_mask) > 0:
+                void_mask_ious = np.atleast_1d(mask_utils.iou(raw_data['tracker_dets'][t], [void_mask], [False]))
+                if void_mask_ious.any():
+                    rows, columns = np.where(void_mask_ious > 0)
+                    for r in rows:
+                        det = mask_utils.decode(raw_data['tracker_dets'][t][r])
+                        void = mask_utils.decode(void_mask).astype(np.bool)
+                        det[void] = 0
+                        det = mask_utils.encode(np.array(det, order='F').astype(np.uint8))
+                        raw_data['tracker_dets'][t][r] = det
+        data['tracker_dets'] = raw_data['tracker_dets']
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = raw_data['num_tracker_ids']
+        data['num_gt_ids'] = raw_data['num_gt_ids']
+        data['mask_shape'] = raw_data['mask_shape']
+        data['num_timesteps'] = num_timesteps
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
+        return similarity_scores
--- a/test/yolov7-tracker/tracker/trackeval/datasets/head_tracking_challenge.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/head_tracking_challenge.py
@@ -0,0 +1,459 @@
+import os
+import csv
+import configparser
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ._base_dataset import _BaseDataset
+from .. import utils
+from .. import _timing
+from ..utils import TrackEvalException
+
+
+class HeadTrackingChallenge(_BaseDataset):
+    """Dataset class for Head Tracking Challenge - 2D bounding box tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/mot_challenge/'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/mot_challenge/'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'CLASSES_TO_EVAL': ['pedestrian'],  # Valid: ['pedestrian']
+            'BENCHMARK': 'HT',  # Valid: 'HT'. Refers to "Head Tracking or the dataset CroHD"
+            'SPLIT_TO_EVAL': 'train',  # Valid: 'train', 'test', 'all'
+            'INPUT_AS_ZIP': False,  # Whether tracker input files are zipped
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'DO_PREPROC': True,  # Whether to perform preprocessing (never done for MOT15)
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            'SEQMAP_FOLDER': None,  # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
+            'SEQMAP_FILE': None,  # Directly specify seqmap file (if none use seqmap_folder/benchmark-split_to_eval)
+            'SEQ_INFO': None,  # If not None, directly specify sequences to eval and their number of timesteps
+            'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt',  # '{gt_folder}/{seq}/gt/gt.txt'
+            'SKIP_SPLIT_FOL': False,  # If False, data is in GT_FOLDER/BENCHMARK-SPLIT_TO_EVAL/ and in
+                                      # TRACKERS_FOLDER/BENCHMARK-SPLIT_TO_EVAL/tracker/
+                                      # If True, then the middle 'benchmark-split' folder is skipped for both.
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+
+        self.benchmark = self.config['BENCHMARK']
+        gt_set = self.config['BENCHMARK'] + '-' + self.config['SPLIT_TO_EVAL']
+        self.gt_set = gt_set
+        if not self.config['SKIP_SPLIT_FOL']:
+            split_fol = gt_set
+        else:
+            split_fol = ''
+        self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
+        self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
+        self.should_classes_combine = False
+        self.use_super_categories = False
+        self.data_is_zipped = self.config['INPUT_AS_ZIP']
+        self.do_preproc = self.config['DO_PREPROC']
+
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+
+        # Get classes to eval
+        self.valid_classes = ['pedestrian']
+        self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+                           for cls in self.config['CLASSES_TO_EVAL']]
+        if not all(self.class_list):
+            raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
+        self.class_name_to_class_id = {'pedestrian': 1, 'static': 2, 'ignore': 3, 'person_on_vehicle': 4}
+        self.valid_class_numbers = list(self.class_name_to_class_id.values())
+
+        # Get sequences to eval and check gt files exist
+        self.seq_list, self.seq_lengths = self._get_seq_info()
+        if len(self.seq_list) < 1:
+            raise TrackEvalException('No sequences are selected to be evaluated.')
+
+        # Check gt files exist
+        for seq in self.seq_list:
+            if not self.data_is_zipped:
+                curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
+                if not os.path.isfile(curr_file):
+                    print('GT file not found ' + curr_file)
+                    raise TrackEvalException('GT file not found for sequence: ' + seq)
+        if self.data_is_zipped:
+            curr_file = os.path.join(self.gt_fol, 'data.zip')
+            if not os.path.isfile(curr_file):
+                print('GT file not found ' + curr_file)
+                raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        for tracker in self.tracker_list:
+            if self.data_is_zipped:
+                curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+                if not os.path.isfile(curr_file):
+                    print('Tracker file not found: ' + curr_file)
+                    raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
+            else:
+                for seq in self.seq_list:
+                    curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+                    if not os.path.isfile(curr_file):
+                        print('Tracker file not found: ' + curr_file)
+                        raise TrackEvalException(
+                            'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
+                                curr_file))
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _get_seq_info(self):
+        seq_list = []
+        seq_lengths = {}
+        if self.config["SEQ_INFO"]:
+            seq_list = list(self.config["SEQ_INFO"].keys())
+            seq_lengths = self.config["SEQ_INFO"]
+
+            # If sequence length is 'None' tries to read sequence length from .ini files.
+            for seq, seq_length in seq_lengths.items():
+                if seq_length is None:
+                    ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
+                    if not os.path.isfile(ini_file):
+                        raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
+                    ini_data = configparser.ConfigParser()
+                    ini_data.read(ini_file)
+                    seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
+
+        else:
+            if self.config["SEQMAP_FILE"]:
+                seqmap_file = self.config["SEQMAP_FILE"]
+            else:
+                if self.config["SEQMAP_FOLDER"] is None:
+                    seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
+                else:
+                    seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
+            if not os.path.isfile(seqmap_file):
+                print('no seqmap found: ' + seqmap_file)
+                raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
+            with open(seqmap_file) as fp:
+                reader = csv.reader(fp)
+                for i, row in enumerate(reader):
+                    if i == 0 or row[0] == '':
+                        continue
+                    seq = row[0]
+                    seq_list.append(seq)
+                    ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
+                    if not os.path.isfile(ini_file):
+                        raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
+                    ini_data = configparser.ConfigParser()
+                    ini_data.read(ini_file)
+                    seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
+        return seq_list, seq_lengths
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the MOT Challenge 2D box format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
+        [gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        """
+        # File location
+        if self.data_is_zipped:
+            if is_gt:
+                zip_file = os.path.join(self.gt_fol, 'data.zip')
+            else:
+                zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+            file = seq + '.txt'
+        else:
+            zip_file = None
+            if is_gt:
+                file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
+            else:
+                file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+
+        # Load raw data from text file
+        read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file)
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq]
+        data_keys = ['ids', 'classes', 'dets']
+        if is_gt:
+            data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
+        else:
+            data_keys += ['tracker_confidences']
+
+        if self.benchmark == 'HT':
+            data_keys += ['visibility']
+            data_keys += ['gt_conf']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+
+        # Check for any extra time keys
+        current_time_keys = [str( t+ 1) for t in range(num_timesteps)]
+        extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
+        if len(extra_time_keys) > 0:
+            if is_gt:
+                text = 'Ground-truth'
+            else:
+                text = 'Tracking'
+            raise TrackEvalException(
+                text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
+                    [str(x) + ', ' for x in extra_time_keys]))
+
+        for t in range(num_timesteps):
+            time_key = str(t+1)
+            if time_key in read_data.keys():
+                try:
+                    time_data = np.asarray(read_data[time_key], dtype=np.float)
+                except ValueError:
+                    if is_gt:
+                        raise TrackEvalException(
+                            'Cannot convert gt data for sequence %s to float. Is data corrupted?' % seq)
+                    else:
+                        raise TrackEvalException(
+                            'Cannot convert tracking data from tracker %s, sequence %s to float. Is data corrupted?' % (
+                                tracker, seq))
+                try:
+                    raw_data['dets'][t] = np.atleast_2d(time_data[:, 2:6])
+                    raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
+                except IndexError:
+                    if is_gt:
+                        err = 'Cannot load gt data from sequence %s, because there is not enough ' \
+                              'columns in the data.' % seq
+                        raise TrackEvalException(err)
+                    else:
+                        err = 'Cannot load tracker data from tracker %s, sequence %s, because there is not enough ' \
+                              'columns in the data.' % (tracker, seq)
+                        raise TrackEvalException(err)
+                if time_data.shape[1] >= 8:
+                    raw_data['gt_conf'][t] = np.atleast_1d(time_data[:, 6]).astype(float)
+                    raw_data['visibility'][t] = np.atleast_1d(time_data[:, 8]).astype(float)
+                    raw_data['classes'][t] = np.atleast_1d(time_data[:, 7]).astype(int)
+                else:
+                    if not is_gt:
+                        raw_data['classes'][t] = np.ones_like(raw_data['ids'][t])
+                    else:
+                        raise TrackEvalException(
+                            'GT data is not in a valid format, there is not enough rows in seq %s, timestep %i.' % (
+                                seq, t))
+                if is_gt:
+                    gt_extras_dict = {'zero_marked': np.atleast_1d(time_data[:, 6].astype(int))}
+                    raw_data['gt_extras'][t] = gt_extras_dict
+                else:
+                    raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 6])
+            else:
+                raw_data['dets'][t] = np.empty((0, 4))
+                raw_data['ids'][t] = np.empty(0).astype(int)
+                raw_data['classes'][t] = np.empty(0).astype(int)
+                if is_gt:
+                    gt_extras_dict = {'zero_marked': np.empty(0)}
+                    raw_data['gt_extras'][t] = gt_extras_dict
+                else:
+                    raw_data['tracker_confidences'][t] = np.empty(0)
+            if is_gt:
+                raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+        raw_data['num_timesteps'] = num_timesteps
+        raw_data['seq'] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+
+        MOT Challenge:
+            In MOT Challenge, the 4 preproc steps are as follow:
+                1) There is only one class (pedestrian) to be evaluated, but all other classes are used for preproc.
+                2) Predictions are matched against all gt boxes (regardless of class), those matching with distractor
+                    objects are removed.
+                3) There is no crowd ignore regions.
+                4) All gt dets except pedestrian are removed, also removes pedestrian gt dets marked with zero_marked.
+        """
+        # Check that input data has unique ids
+        self._check_unique_ids(raw_data)
+
+        # 'static': 2, 'ignore': 3, 'person_on_vehicle':
+
+        distractor_class_names = ['static', 'ignore', 'person_on_vehicle']
+
+        distractor_classes = [self.class_name_to_class_id[x] for x in distractor_class_names]
+        cls_id = self.class_name_to_class_id[cls]
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences',
+                     'similarity_scores', 'gt_visibility']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+        for t in range(raw_data['num_timesteps']):
+
+            # Get all data
+            gt_ids = raw_data['gt_ids'][t]
+            gt_dets = raw_data['gt_dets'][t]
+            gt_classes = raw_data['gt_classes'][t]
+            gt_visibility = raw_data['visibility'][t]
+            gt_conf = raw_data['gt_conf'][t]
+
+            gt_zero_marked = raw_data['gt_extras'][t]['zero_marked']
+
+            tracker_ids = raw_data['tracker_ids'][t]
+            tracker_dets = raw_data['tracker_dets'][t]
+            tracker_classes = raw_data['tracker_classes'][t]
+            tracker_confidences = raw_data['tracker_confidences'][t]
+            similarity_scores = raw_data['similarity_scores'][t]
+
+            # Evaluation is ONLY valid for pedestrian class
+            if len(tracker_classes) > 0 and np.max(tracker_classes) > 1:
+                raise TrackEvalException(
+                    'Evaluation is only valid for pedestrian class. Non pedestrian class (%i) found in sequence %s at '
+                    'timestep %i.' % (np.max(tracker_classes), raw_data['seq'], t))
+
+            # Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
+            # which are labeled as belonging to a distractor class.
+            to_remove_tracker = np.array([], np.int)
+            if self.do_preproc and self.benchmark != 'MOT15' and gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
+
+                # Check all classes are valid:
+                invalid_classes = np.setdiff1d(np.unique(gt_classes), self.valid_class_numbers)
+                if len(invalid_classes) > 0:
+                    print(' '.join([str(x) for x in invalid_classes]))
+                    raise(TrackEvalException('Attempting to evaluate using invalid gt classes. '
+                                             'This warning only triggers if preprocessing is performed, '
+                                             'e.g. not for MOT15 or where prepropressing is explicitly disabled. '
+                                             'Please either check your gt data, or disable preprocessing. '
+                                             'The following invalid classes were found in timestep ' + str(t) + ': ' +
+                                             ' '.join([str(x) for x in invalid_classes])))
+
+                matching_scores = similarity_scores.copy()
+
+                matching_scores[matching_scores < 0.4 - np.finfo('float').eps] = 0
+
+                match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
+                match_rows = match_rows[actually_matched_mask]
+                match_cols = match_cols[actually_matched_mask]
+
+                is_distractor_class = np.logical_not(np.isin(gt_classes[match_rows], cls_id))
+                if self.benchmark == 'HT':
+                    is_invisible_class = gt_visibility[match_rows] < np.finfo('float').eps
+                    low_conf_class = gt_conf[match_rows] < np.finfo('float').eps
+                    are_distractors = np.logical_or(is_invisible_class, is_distractor_class, low_conf_class)
+                    to_remove_tracker = match_cols[are_distractors]
+                else:
+                    to_remove_tracker = match_cols[is_distractor_class]
+
+            # Apply preprocessing to remove all unwanted tracker dets.
+            data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+            data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+            data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
+            similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+
+            # Remove gt detections marked as to remove (zero marked), and also remove gt detections not in pedestrian
+            if self.do_preproc and self.benchmark == 'HT':
+                gt_to_keep_mask = (np.not_equal(gt_zero_marked, 0)) & \
+                                  (np.equal(gt_classes, cls_id)) & \
+                                  (gt_visibility > 0.) & \
+                                  (gt_conf > 0.)
+
+            else:
+                # There are no classes for MOT15
+                gt_to_keep_mask = np.not_equal(gt_zero_marked, 0)
+            data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
+            data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
+            data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
+            data['gt_visibility'][t] = gt_visibility # No mask!
+
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+        data['seq'] = raw_data['seq']
+
+        # Ensure again that ids are unique per timestep after preproc.
+        self._check_unique_ids(data, after_preproc=True)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='xywh')
+        return similarity_scores
--- a/test/yolov7-tracker/tracker/trackeval/datasets/kitti_2d_box.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/kitti_2d_box.py
@@ -0,0 +1,389 @@
+
+import os
+import csv
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ._base_dataset import _BaseDataset
+from .. import utils
+from ..utils import TrackEvalException
+from .. import _timing
+
+
+class Kitti2DBox(_BaseDataset):
+    """Dataset class for KITTI 2D bounding box tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/kitti/kitti_2d_box_train'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/kitti/kitti_2d_box_train/'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'CLASSES_TO_EVAL': ['car', 'pedestrian'],  # Valid: ['car', 'pedestrian']
+            'SPLIT_TO_EVAL': 'training',  # Valid: 'training', 'val', 'training_minus_val', 'test'
+            'INPUT_AS_ZIP': False,  # Whether tracker input files are zipped
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+        self.gt_fol = self.config['GT_FOLDER']
+        self.tracker_fol = self.config['TRACKERS_FOLDER']
+        self.should_classes_combine = False
+        self.use_super_categories = False
+        self.data_is_zipped = self.config['INPUT_AS_ZIP']
+
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+
+        self.max_occlusion = 2
+        self.max_truncation = 0
+        self.min_height = 25
+
+        # Get classes to eval
+        self.valid_classes = ['car', 'pedestrian']
+        self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+                           for cls in self.config['CLASSES_TO_EVAL']]
+        if not all(self.class_list):
+            raise TrackEvalException('Attempted to evaluate an invalid class. Only classes [car, pedestrian] are valid.')
+        self.class_name_to_class_id = {'car': 1, 'van': 2, 'truck': 3, 'pedestrian': 4, 'person': 5,  # person sitting
+                                       'cyclist': 6, 'tram': 7, 'misc': 8, 'dontcare': 9, 'car_2': 1}
+
+        # Get sequences to eval and check gt files exist
+        self.seq_list = []
+        self.seq_lengths = {}
+        seqmap_name = 'evaluate_tracking.seqmap.' + self.config['SPLIT_TO_EVAL']
+        seqmap_file = os.path.join(self.gt_fol, seqmap_name)
+        if not os.path.isfile(seqmap_file):
+            raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
+        with open(seqmap_file) as fp:
+            dialect = csv.Sniffer().sniff(fp.read(1024))
+            fp.seek(0)
+            reader = csv.reader(fp, dialect)
+            for row in reader:
+                if len(row) >= 4:
+                    seq = row[0]
+                    self.seq_list.append(seq)
+                    self.seq_lengths[seq] = int(row[3])
+                    if not self.data_is_zipped:
+                        curr_file = os.path.join(self.gt_fol, 'label_02', seq + '.txt')
+                        if not os.path.isfile(curr_file):
+                            raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
+            if self.data_is_zipped:
+                curr_file = os.path.join(self.gt_fol, 'data.zip')
+                if not os.path.isfile(curr_file):
+                    raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        for tracker in self.tracker_list:
+            if self.data_is_zipped:
+                curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+                if not os.path.isfile(curr_file):
+                    raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
+            else:
+                for seq in self.seq_list:
+                    curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+                    if not os.path.isfile(curr_file):
+                        raise TrackEvalException(
+                            'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
+                                curr_file))
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the kitti 2D box format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
+        [gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        """
+        # File location
+        if self.data_is_zipped:
+            if is_gt:
+                zip_file = os.path.join(self.gt_fol, 'data.zip')
+            else:
+                zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+            file = seq + '.txt'
+        else:
+            zip_file = None
+            if is_gt:
+                file = os.path.join(self.gt_fol, 'label_02', seq + '.txt')
+            else:
+                file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+
+        # Ignore regions
+        if is_gt:
+            crowd_ignore_filter = {2: ['dontcare']}
+        else:
+            crowd_ignore_filter = None
+
+        # Valid classes
+        valid_filter = {2: [x for x in self.class_list]}
+        if is_gt:
+            if 'car' in self.class_list:
+                valid_filter[2].append('van')
+            if 'pedestrian' in self.class_list:
+                valid_filter[2] += ['person']
+
+        # Convert kitti class strings to class ids
+        convert_filter = {2: self.class_name_to_class_id}
+
+        # Load raw data from text file
+        read_data, ignore_data = self._load_simple_text_file(file, time_col=0, id_col=1, remove_negative_ids=True,
+                                                             valid_filter=valid_filter,
+                                                             crowd_ignore_filter=crowd_ignore_filter,
+                                                             convert_filter=convert_filter,
+                                                             is_zipped=self.data_is_zipped, zip_file=zip_file)
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq]
+        data_keys = ['ids', 'classes', 'dets']
+        if is_gt:
+            data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
+        else:
+            data_keys += ['tracker_confidences']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+
+        # Check for any extra time keys
+        current_time_keys = [str(t) for t in range(num_timesteps)]
+        extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
+        if len(extra_time_keys) > 0:
+            if is_gt:
+                text = 'Ground-truth'
+            else:
+                text = 'Tracking'
+            raise TrackEvalException(
+                text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
+                    [str(x) + ', ' for x in extra_time_keys]))
+
+        for t in range(num_timesteps):
+            time_key = str(t)
+            if time_key in read_data.keys():
+                time_data = np.asarray(read_data[time_key], dtype=np.float)
+                raw_data['dets'][t] = np.atleast_2d(time_data[:, 6:10])
+                raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
+                raw_data['classes'][t] = np.atleast_1d(time_data[:, 2]).astype(int)
+                if is_gt:
+                    gt_extras_dict = {'truncation': np.atleast_1d(time_data[:, 3].astype(int)),
+                                      'occlusion': np.atleast_1d(time_data[:, 4].astype(int))}
+                    raw_data['gt_extras'][t] = gt_extras_dict
+                else:
+                    if time_data.shape[1] > 17:
+                        raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 17])
+                    else:
+                        raw_data['tracker_confidences'][t] = np.ones(time_data.shape[0])
+            else:
+                raw_data['dets'][t] = np.empty((0, 4))
+                raw_data['ids'][t] = np.empty(0).astype(int)
+                raw_data['classes'][t] = np.empty(0).astype(int)
+                if is_gt:
+                    gt_extras_dict = {'truncation': np.empty(0),
+                                      'occlusion': np.empty(0)}
+                    raw_data['gt_extras'][t] = gt_extras_dict
+                else:
+                    raw_data['tracker_confidences'][t] = np.empty(0)
+            if is_gt:
+                if time_key in ignore_data.keys():
+                    time_ignore = np.asarray(ignore_data[time_key], dtype=np.float)
+                    raw_data['gt_crowd_ignore_regions'][t] = np.atleast_2d(time_ignore[:, 6:10])
+                else:
+                    raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+        raw_data['num_timesteps'] = num_timesteps
+        raw_data['seq'] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+
+        KITTI:
+            In KITTI, the 4 preproc steps are as follow:
+                1) There are two classes (pedestrian and car) which are evaluated separately.
+                2) For the pedestrian class, the 'person' class is distractor objects (people sitting).
+                    For the car class, the 'van' class are distractor objects.
+                    GT boxes marked as having occlusion level > 2 or truncation level > 0 are also treated as
+                        distractors.
+                3) Crowd ignore regions are used to remove unmatched detections. Also unmatched detections with
+                    height <= 25 pixels are removed.
+                4) Distractor gt dets (including truncated and occluded) are removed.
+        """
+        if cls == 'pedestrian':
+            distractor_classes = [self.class_name_to_class_id['person']]
+        elif cls == 'car':
+            distractor_classes = [self.class_name_to_class_id['van']]
+        else:
+            raise (TrackEvalException('Class %s is not evaluatable' % cls))
+        cls_id = self.class_name_to_class_id[cls]
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+        for t in range(raw_data['num_timesteps']):
+
+            # Only extract relevant dets for this class for preproc and eval (cls + distractor classes)
+            gt_class_mask = np.sum([raw_data['gt_classes'][t] == c for c in [cls_id] + distractor_classes], axis=0)
+            gt_class_mask = gt_class_mask.astype(np.bool)
+            gt_ids = raw_data['gt_ids'][t][gt_class_mask]
+            gt_dets = raw_data['gt_dets'][t][gt_class_mask]
+            gt_classes = raw_data['gt_classes'][t][gt_class_mask]
+            gt_occlusion = raw_data['gt_extras'][t]['occlusion'][gt_class_mask]
+            gt_truncation = raw_data['gt_extras'][t]['truncation'][gt_class_mask]
+
+            tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
+            tracker_class_mask = tracker_class_mask.astype(np.bool)
+            tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
+            tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
+            tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
+            similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
+
+            # Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
+            # which are labeled as truncated, occluded, or belonging to a distractor class.
+            to_remove_matched = np.array([], np.int)
+            unmatched_indices = np.arange(tracker_ids.shape[0])
+            if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
+                matching_scores = similarity_scores.copy()
+                matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
+                match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
+                match_rows = match_rows[actually_matched_mask]
+                match_cols = match_cols[actually_matched_mask]
+
+                is_distractor_class = np.isin(gt_classes[match_rows], distractor_classes)
+                is_occluded_or_truncated = np.logical_or(
+                    gt_occlusion[match_rows] > self.max_occlusion + np.finfo('float').eps,
+                    gt_truncation[match_rows] > self.max_truncation + np.finfo('float').eps)
+                to_remove_matched = np.logical_or(is_distractor_class, is_occluded_or_truncated)
+                to_remove_matched = match_cols[to_remove_matched]
+                unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
+
+            # For unmatched tracker dets, also remove those smaller than a minimum height.
+            unmatched_tracker_dets = tracker_dets[unmatched_indices, :]
+            unmatched_heights = unmatched_tracker_dets[:, 3] - unmatched_tracker_dets[:, 1]
+            is_too_small = unmatched_heights <= self.min_height + np.finfo('float').eps
+
+            # For unmatched tracker dets, also remove those that are greater than 50% within a crowd ignore region.
+            crowd_ignore_regions = raw_data['gt_crowd_ignore_regions'][t]
+            intersection_with_ignore_region = self._calculate_box_ious(unmatched_tracker_dets, crowd_ignore_regions,
+                                                                       box_format='x0y0x1y1', do_ioa=True)
+            is_within_crowd_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps, axis=1)
+
+            # Apply preprocessing to remove all unwanted tracker dets.
+            to_remove_unmatched = unmatched_indices[np.logical_or(is_too_small, is_within_crowd_ignore_region)]
+            to_remove_tracker = np.concatenate((to_remove_matched, to_remove_unmatched), axis=0)
+            data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+            data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+            data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
+            similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+
+            # Also remove gt dets that were only useful for preprocessing and are not needed for evaluation.
+            # These are those that are occluded, truncated and from distractor objects.
+            gt_to_keep_mask = (np.less_equal(gt_occlusion, self.max_occlusion)) & \
+                              (np.less_equal(gt_truncation, self.max_truncation)) & \
+                              (np.equal(gt_classes, cls_id))
+            data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
+            data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
+            data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+        data['seq'] = raw_data['seq']
+
+        # Ensure that ids are unique per timestep.
+        self._check_unique_ids(data)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='x0y0x1y1')
+        return similarity_scores
--- a/test/yolov7-tracker/tracker/trackeval/datasets/kitti_mots.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/kitti_mots.py
@@ -0,0 +1,426 @@
+import os
+import csv
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ._base_dataset import _BaseDataset
+from .. import utils
+from .. import _timing
+from ..utils import TrackEvalException
+
+
+class KittiMOTS(_BaseDataset):
+    """Dataset class for KITTI MOTS tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/kitti/kitti_mots_val'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/kitti/kitti_mots_val'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'CLASSES_TO_EVAL': ['car', 'pedestrian'],  # Valid: ['car', 'pedestrian']
+            'SPLIT_TO_EVAL': 'val',  # Valid: 'training', 'val'
+            'INPUT_AS_ZIP': False,  # Whether tracker input files are zipped
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            'SEQMAP_FOLDER': None,  # Where seqmaps are found (if None, GT_FOLDER)
+            'SEQMAP_FILE': None,    # Directly specify seqmap file (if none use seqmap_folder/split_to_eval.seqmap)
+            'SEQ_INFO': None,  # If not None, directly specify sequences to eval and their number of timesteps
+            'GT_LOC_FORMAT': '{gt_folder}/label_02/{seq}.txt',  # format of gt localization
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+        self.gt_fol = self.config['GT_FOLDER']
+        self.tracker_fol = self.config['TRACKERS_FOLDER']
+        self.split_to_eval = self.config['SPLIT_TO_EVAL']
+        self.should_classes_combine = False
+        self.use_super_categories = False
+        self.data_is_zipped = self.config['INPUT_AS_ZIP']
+
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+
+        # Get classes to eval
+        self.valid_classes = ['car', 'pedestrian']
+        self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+                           for cls in self.config['CLASSES_TO_EVAL']]
+        if not all(self.class_list):
+            raise TrackEvalException('Attempted to evaluate an invalid class. '
+                                     'Only classes [car, pedestrian] are valid.')
+        self.class_name_to_class_id = {'car': '1', 'pedestrian': '2', 'ignore': '10'}
+
+        # Get sequences to eval and check gt files exist
+        self.seq_list, self.seq_lengths = self._get_seq_info()
+        if len(self.seq_list) < 1:
+            raise TrackEvalException('No sequences are selected to be evaluated.')
+
+        # Check gt files exist
+        for seq in self.seq_list:
+            if not self.data_is_zipped:
+                curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
+                if not os.path.isfile(curr_file):
+                    print('GT file not found ' + curr_file)
+                    raise TrackEvalException('GT file not found for sequence: ' + seq)
+        if self.data_is_zipped:
+            curr_file = os.path.join(self.gt_fol, 'data.zip')
+            if not os.path.isfile(curr_file):
+                raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        for tracker in self.tracker_list:
+            if self.data_is_zipped:
+                curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+                if not os.path.isfile(curr_file):
+                    print('Tracker file not found: ' + curr_file)
+                    raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
+            else:
+                for seq in self.seq_list:
+                    curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+                    if not os.path.isfile(curr_file):
+                        print('Tracker file not found: ' + curr_file)
+                        raise TrackEvalException(
+                            'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
+                                curr_file))
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _get_seq_info(self):
+        seq_list = []
+        seq_lengths = {}
+        seqmap_name = 'evaluate_mots.seqmap.' + self.config['SPLIT_TO_EVAL']
+
+        if self.config["SEQ_INFO"]:
+            seq_list = list(self.config["SEQ_INFO"].keys())
+            seq_lengths = self.config["SEQ_INFO"]
+        else:
+            if self.config["SEQMAP_FILE"]:
+                seqmap_file = self.config["SEQMAP_FILE"]
+            else:
+                if self.config["SEQMAP_FOLDER"] is None:
+                    seqmap_file = os.path.join(self.config['GT_FOLDER'], seqmap_name)
+                else:
+                    seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], seqmap_name)
+            if not os.path.isfile(seqmap_file):
+                print('no seqmap found: ' + seqmap_file)
+                raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
+            with open(seqmap_file) as fp:
+                reader = csv.reader(fp)
+                for i, _ in enumerate(reader):
+                    dialect = csv.Sniffer().sniff(fp.read(1024))
+                    fp.seek(0)
+                    reader = csv.reader(fp, dialect)
+                    for row in reader:
+                        if len(row) >= 4:
+                            seq = "%04d" % int(row[0])
+                            seq_list.append(seq)
+                            seq_lengths[seq] = int(row[3]) + 1
+        return seq_list, seq_lengths
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the KITTI MOTS format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets]: list (for each timestep) of lists of detections.
+        [gt_ignore_region]: list (for each timestep) of masks for the ignore regions
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        """
+
+        # Only loaded when run to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        # File location
+        if self.data_is_zipped:
+            if is_gt:
+                zip_file = os.path.join(self.gt_fol, 'data.zip')
+            else:
+                zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+            file = seq + '.txt'
+        else:
+            zip_file = None
+            if is_gt:
+                file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
+            else:
+                file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+
+        # Ignore regions
+        if is_gt:
+            crowd_ignore_filter = {2: ['10']}
+        else:
+            crowd_ignore_filter = None
+
+        # Load raw data from text file
+        read_data, ignore_data = self._load_simple_text_file(file, crowd_ignore_filter=crowd_ignore_filter,
+                                                             is_zipped=self.data_is_zipped, zip_file=zip_file,
+                                                             force_delimiters=' ')
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq]
+        data_keys = ['ids', 'classes', 'dets']
+        if is_gt:
+            data_keys += ['gt_ignore_region']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+
+        # Check for any extra time keys
+        current_time_keys = [str(t) for t in range(num_timesteps)]
+        extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
+        if len(extra_time_keys) > 0:
+            if is_gt:
+                text = 'Ground-truth'
+            else:
+                text = 'Tracking'
+            raise TrackEvalException(
+                text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
+                    [str(x) + ', ' for x in extra_time_keys]))
+
+        for t in range(num_timesteps):
+            time_key = str(t)
+            # list to collect all masks of a timestep to check for overlapping areas
+            all_masks = []
+            if time_key in read_data.keys():
+                try:
+                    raw_data['dets'][t] = [{'size': [int(region[3]), int(region[4])],
+                                            'counts': region[5].encode(encoding='UTF-8')}
+                                           for region in read_data[time_key]]
+                    raw_data['ids'][t] = np.atleast_1d([region[1] for region in read_data[time_key]]).astype(int)
+                    raw_data['classes'][t] = np.atleast_1d([region[2] for region in read_data[time_key]]).astype(int)
+                    all_masks += raw_data['dets'][t]
+                except IndexError:
+                    self._raise_index_error(is_gt, tracker, seq)
+                except ValueError:
+                    self._raise_value_error(is_gt, tracker, seq)
+            else:
+                raw_data['dets'][t] = []
+                raw_data['ids'][t] = np.empty(0).astype(int)
+                raw_data['classes'][t] = np.empty(0).astype(int)
+            if is_gt:
+                if time_key in ignore_data.keys():
+                    try:
+                        time_ignore = [{'size': [int(region[3]), int(region[4])],
+                                        'counts': region[5].encode(encoding='UTF-8')}
+                                       for region in ignore_data[time_key]]
+                        raw_data['gt_ignore_region'][t] = mask_utils.merge([mask for mask in time_ignore],
+                                                                           intersect=False)
+                        all_masks += [raw_data['gt_ignore_region'][t]]
+                    except IndexError:
+                        self._raise_index_error(is_gt, tracker, seq)
+                    except ValueError:
+                        self._raise_value_error(is_gt, tracker, seq)
+                else:
+                    raw_data['gt_ignore_region'][t] = mask_utils.merge([], intersect=False)
+
+            # check for overlapping masks
+            if all_masks:
+                masks_merged = all_masks[0]
+                for mask in all_masks[1:]:
+                    if mask_utils.area(mask_utils.merge([masks_merged, mask], intersect=True)) != 0.0:
+                        raise TrackEvalException(
+                            'Tracker has overlapping masks. Tracker: ' + tracker + ' Seq: ' + seq + ' Timestep: ' + str(
+                                t))
+                    masks_merged = mask_utils.merge([masks_merged, mask], intersect=False)
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+        raw_data["num_timesteps"] = num_timesteps
+        raw_data['seq'] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detection masks.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+
+        KITTI MOTS:
+            In KITTI MOTS, the 4 preproc steps are as follow:
+                1) There are two classes (car and pedestrian) which are evaluated separately.
+                2) There are no ground truth detections marked as to be removed/distractor classes.
+                    Therefore also no matched tracker detections are removed.
+                3) Ignore regions are used to remove unmatched detections (at least 50% overlap with ignore region).
+                4) There are no ground truth detections (e.g. those of distractor classes) to be removed.
+        """
+        # Check that input data has unique ids
+        self._check_unique_ids(raw_data)
+
+        cls_id = int(self.class_name_to_class_id[cls])
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+        for t in range(raw_data['num_timesteps']):
+
+            # Only extract relevant dets for this class for preproc and eval (cls)
+            gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
+            gt_class_mask = gt_class_mask.astype(np.bool)
+            gt_ids = raw_data['gt_ids'][t][gt_class_mask]
+            gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
+
+            tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
+            tracker_class_mask = tracker_class_mask.astype(np.bool)
+            tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
+            tracker_dets = [raw_data['tracker_dets'][t][ind] for ind in range(len(tracker_class_mask)) if
+                            tracker_class_mask[ind]]
+            similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
+
+            # Match tracker and gt dets (with hungarian algorithm)
+            unmatched_indices = np.arange(tracker_ids.shape[0])
+            if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
+                matching_scores = similarity_scores.copy()
+                matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = -10000
+                match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
+                match_cols = match_cols[actually_matched_mask]
+
+                unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
+
+            # For unmatched tracker dets, remove those that are greater than 50% within a crowd ignore region.
+            unmatched_tracker_dets = [tracker_dets[i] for i in range(len(tracker_dets)) if i in unmatched_indices]
+            ignore_region = raw_data['gt_ignore_region'][t]
+            intersection_with_ignore_region = self._calculate_mask_ious(unmatched_tracker_dets, [ignore_region],
+                                                                        is_encoded=True, do_ioa=True)
+            is_within_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps, axis=1)
+
+            # Apply preprocessing to remove unwanted tracker dets.
+            to_remove_tracker = unmatched_indices[is_within_ignore_region]
+            data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+            data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+            similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+
+            # Keep all ground truth detections
+            data['gt_ids'][t] = gt_ids
+            data['gt_dets'][t] = gt_dets
+            data['similarity_scores'][t] = similarity_scores
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+        data['seq'] = raw_data['seq']
+        data['cls'] = cls
+
+        # Ensure again that ids are unique per timestep after preproc.
+        self._check_unique_ids(data, after_preproc=True)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
+        return similarity_scores
+
+    @staticmethod
+    def _raise_index_error(is_gt, tracker, seq):
+        """
+        Auxiliary method to raise an evaluation error in case of an index error while reading files.
+        :param is_gt: whether gt or tracker data is read
+        :param tracker: the name of the tracker
+        :param seq: the name of the seq
+        :return: None
+        """
+        if is_gt:
+            err = 'Cannot load gt data from sequence %s, because there are not enough ' \
+                  'columns in the data.' % seq
+            raise TrackEvalException(err)
+        else:
+            err = 'Cannot load tracker data from tracker %s, sequence %s, because there are not enough ' \
+                  'columns in the data.' % (tracker, seq)
+            raise TrackEvalException(err)
+
+    @staticmethod
+    def _raise_value_error(is_gt, tracker, seq):
+        """
+        Auxiliary method to raise an evaluation error in case of an value error while reading files.
+        :param is_gt: whether gt or tracker data is read
+        :param tracker: the name of the tracker
+        :param seq: the name of the seq
+        :return: None
+        """
+        if is_gt:
+            raise TrackEvalException(
+                'GT data for sequence %s cannot be converted to the right format. Is data corrupted?' % seq)
+        else:
+            raise TrackEvalException(
+                'Tracking data from tracker %s, sequence %s cannot be converted to the right format. '
+                'Is data corrupted?' % (tracker, seq))
--- a/test/yolov7-tracker/tracker/trackeval/datasets/mot_challenge_2d_box.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/mot_challenge_2d_box.py
@@ -0,0 +1,437 @@
+import os
+import csv
+import configparser
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ._base_dataset import _BaseDataset
+from .. import utils
+from .. import _timing
+from ..utils import TrackEvalException
+
+
+class MotChallenge2DBox(_BaseDataset):
+    """Dataset class for MOT Challenge 2D bounding box tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/mot_challenge/'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/mot_challenge/'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'CLASSES_TO_EVAL': ['pedestrian'],  # Valid: ['pedestrian']
+            'BENCHMARK': 'MOT17',  # Valid: 'MOT17', 'MOT16', 'MOT20', 'MOT15'
+            'SPLIT_TO_EVAL': 'train',  # Valid: 'train', 'test', 'all'
+            'INPUT_AS_ZIP': False,  # Whether tracker input files are zipped
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'DO_PREPROC': True,  # Whether to perform preprocessing (never done for MOT15)
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            'SEQMAP_FOLDER': None,  # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
+            'SEQMAP_FILE': None,  # Directly specify seqmap file (if none use seqmap_folder/benchmark-split_to_eval)
+            'SEQ_INFO': None,  # If not None, directly specify sequences to eval and their number of timesteps
+            'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt',  # '{gt_folder}/{seq}/gt/gt.txt'
+            'SKIP_SPLIT_FOL': False,  # If False, data is in GT_FOLDER/BENCHMARK-SPLIT_TO_EVAL/ and in
+                                      # TRACKERS_FOLDER/BENCHMARK-SPLIT_TO_EVAL/tracker/
+                                      # If True, then the middle 'benchmark-split' folder is skipped for both.
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+
+        self.benchmark = self.config['BENCHMARK']
+        gt_set = self.config['BENCHMARK'] + '-' + self.config['SPLIT_TO_EVAL']
+        self.gt_set = gt_set
+        if not self.config['SKIP_SPLIT_FOL']:
+            split_fol = gt_set
+        else:
+            split_fol = ''
+        self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
+        self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
+        self.should_classes_combine = False
+        self.use_super_categories = False
+        self.data_is_zipped = self.config['INPUT_AS_ZIP']
+        self.do_preproc = self.config['DO_PREPROC']
+
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+
+        # Get classes to eval
+        self.valid_classes = ['pedestrian']
+        self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+                           for cls in self.config['CLASSES_TO_EVAL']]
+        if not all(self.class_list):
+            raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
+        self.class_name_to_class_id = {'pedestrian': 1, 'person_on_vehicle': 2, 'car': 3, 'bicycle': 4, 'motorbike': 5,
+                                       'non_mot_vehicle': 6, 'static_person': 7, 'distractor': 8, 'occluder': 9,
+                                       'occluder_on_ground': 10, 'occluder_full': 11, 'reflection': 12, 'crowd': 13}
+        self.valid_class_numbers = list(self.class_name_to_class_id.values())
+
+        # Get sequences to eval and check gt files exist
+        self.seq_list, self.seq_lengths = self._get_seq_info()
+        if len(self.seq_list) < 1:
+            raise TrackEvalException('No sequences are selected to be evaluated.')
+
+        # Check gt files exist
+        for seq in self.seq_list:
+            if not self.data_is_zipped:
+                curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
+                if not os.path.isfile(curr_file):
+                    print('GT file not found ' + curr_file)
+                    raise TrackEvalException('GT file not found for sequence: ' + seq)
+        if self.data_is_zipped:
+            curr_file = os.path.join(self.gt_fol, 'data.zip')
+            if not os.path.isfile(curr_file):
+                print('GT file not found ' + curr_file)
+                raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        for tracker in self.tracker_list:
+            if self.data_is_zipped:
+                curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+                if not os.path.isfile(curr_file):
+                    print('Tracker file not found: ' + curr_file)
+                    raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
+            else:
+                for seq in self.seq_list:
+                    curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+                    if not os.path.isfile(curr_file):
+                        print('Tracker file not found: ' + curr_file)
+                        raise TrackEvalException(
+                            'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
+                                curr_file))
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _get_seq_info(self):
+        seq_list = []
+        seq_lengths = {}
+        if self.config["SEQ_INFO"]:
+            seq_list = list(self.config["SEQ_INFO"].keys())
+            seq_lengths = self.config["SEQ_INFO"]
+
+            # If sequence length is 'None' tries to read sequence length from .ini files.
+            for seq, seq_length in seq_lengths.items():
+                if seq_length is None:
+                    ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
+                    if not os.path.isfile(ini_file):
+                        raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
+                    ini_data = configparser.ConfigParser()
+                    ini_data.read(ini_file)
+                    seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
+
+        else:
+            if self.config["SEQMAP_FILE"]:
+                seqmap_file = self.config["SEQMAP_FILE"]
+            else:
+                if self.config["SEQMAP_FOLDER"] is None:
+                    seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
+                else:
+                    seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
+            if not os.path.isfile(seqmap_file):
+                print('no seqmap found: ' + seqmap_file)
+                raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
+            with open(seqmap_file) as fp:
+                reader = csv.reader(fp)
+                for i, row in enumerate(reader):
+                    if i == 0 or row[0] == '':
+                        continue
+                    seq = row[0]
+                    seq_list.append(seq)
+                    ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
+                    if not os.path.isfile(ini_file):
+                        raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
+                    ini_data = configparser.ConfigParser()
+                    ini_data.read(ini_file)
+                    seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
+        return seq_list, seq_lengths
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the MOT Challenge 2D box format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
+        [gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        """
+        # File location
+        if self.data_is_zipped:
+            if is_gt:
+                zip_file = os.path.join(self.gt_fol, 'data.zip')
+            else:
+                zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+            file = seq + '.txt'
+        else:
+            zip_file = None
+            if is_gt:
+                file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
+            else:
+                file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+
+        # Load raw data from text file
+        read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file)
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq]
+        data_keys = ['ids', 'classes', 'dets']
+        if is_gt:
+            data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
+        else:
+            data_keys += ['tracker_confidences']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+
+        # Check for any extra time keys
+        current_time_keys = [str( t+ 1) for t in range(num_timesteps)]
+        extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
+        if len(extra_time_keys) > 0:
+            if is_gt:
+                text = 'Ground-truth'
+            else:
+                text = 'Tracking'
+            raise TrackEvalException(
+                text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
+                    [str(x) + ', ' for x in extra_time_keys]))
+
+        for t in range(num_timesteps):
+            time_key = str(t+1)
+            if time_key in read_data.keys():
+                try:
+                    time_data = np.asarray(read_data[time_key], dtype=np.float)
+                except ValueError:
+                    if is_gt:
+                        raise TrackEvalException(
+                            'Cannot convert gt data for sequence %s to float. Is data corrupted?' % seq)
+                    else:
+                        raise TrackEvalException(
+                            'Cannot convert tracking data from tracker %s, sequence %s to float. Is data corrupted?' % (
+                                tracker, seq))
+                try:
+                    raw_data['dets'][t] = np.atleast_2d(time_data[:, 2:6])
+                    raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
+                except IndexError:
+                    if is_gt:
+                        err = 'Cannot load gt data from sequence %s, because there is not enough ' \
+                              'columns in the data.' % seq
+                        raise TrackEvalException(err)
+                    else:
+                        err = 'Cannot load tracker data from tracker %s, sequence %s, because there is not enough ' \
+                              'columns in the data.' % (tracker, seq)
+                        raise TrackEvalException(err)
+                if time_data.shape[1] >= 8:
+                    raw_data['classes'][t] = np.atleast_1d(time_data[:, 7]).astype(int)
+                else:
+                    if not is_gt:
+                        raw_data['classes'][t] = np.ones_like(raw_data['ids'][t])
+                    else:
+                        raise TrackEvalException(
+                            'GT data is not in a valid format, there is not enough rows in seq %s, timestep %i.' % (
+                                seq, t))
+                if is_gt:
+                    gt_extras_dict = {'zero_marked': np.atleast_1d(time_data[:, 6].astype(int))}
+                    raw_data['gt_extras'][t] = gt_extras_dict
+                else:
+                    raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 6])
+            else:
+                raw_data['dets'][t] = np.empty((0, 4))
+                raw_data['ids'][t] = np.empty(0).astype(int)
+                raw_data['classes'][t] = np.empty(0).astype(int)
+                if is_gt:
+                    gt_extras_dict = {'zero_marked': np.empty(0)}
+                    raw_data['gt_extras'][t] = gt_extras_dict
+                else:
+                    raw_data['tracker_confidences'][t] = np.empty(0)
+            if is_gt:
+                raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+        raw_data['num_timesteps'] = num_timesteps
+        raw_data['seq'] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+
+        MOT Challenge:
+            In MOT Challenge, the 4 preproc steps are as follow:
+                1) There is only one class (pedestrian) to be evaluated, but all other classes are used for preproc.
+                2) Predictions are matched against all gt boxes (regardless of class), those matching with distractor
+                    objects are removed.
+                3) There is no crowd ignore regions.
+                4) All gt dets except pedestrian are removed, also removes pedestrian gt dets marked with zero_marked.
+        """
+        # Check that input data has unique ids
+        self._check_unique_ids(raw_data)
+
+        distractor_class_names = ['person_on_vehicle', 'static_person', 'distractor', 'reflection']
+        if self.benchmark == 'MOT20':
+            distractor_class_names.append('non_mot_vehicle')
+        distractor_classes = [self.class_name_to_class_id[x] for x in distractor_class_names]
+        cls_id = self.class_name_to_class_id[cls]
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+        for t in range(raw_data['num_timesteps']):
+
+            # Get all data
+            gt_ids = raw_data['gt_ids'][t]
+            gt_dets = raw_data['gt_dets'][t]
+            gt_classes = raw_data['gt_classes'][t]
+            gt_zero_marked = raw_data['gt_extras'][t]['zero_marked']
+
+            tracker_ids = raw_data['tracker_ids'][t]
+            tracker_dets = raw_data['tracker_dets'][t]
+            tracker_classes = raw_data['tracker_classes'][t]
+            tracker_confidences = raw_data['tracker_confidences'][t]
+            similarity_scores = raw_data['similarity_scores'][t]
+
+            # Evaluation is ONLY valid for pedestrian class
+            if len(tracker_classes) > 0 and np.max(tracker_classes) > 1:
+                raise TrackEvalException(
+                    'Evaluation is only valid for pedestrian class. Non pedestrian class (%i) found in sequence %s at '
+                    'timestep %i.' % (np.max(tracker_classes), raw_data['seq'], t))
+
+            # Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
+            # which are labeled as belonging to a distractor class.
+            to_remove_tracker = np.array([], np.int)
+            if self.do_preproc and self.benchmark != 'MOT15' and gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
+
+                # Check all classes are valid:
+                invalid_classes = np.setdiff1d(np.unique(gt_classes), self.valid_class_numbers)
+                if len(invalid_classes) > 0:
+                    print(' '.join([str(x) for x in invalid_classes]))
+                    raise(TrackEvalException('Attempting to evaluate using invalid gt classes. '
+                                             'This warning only triggers if preprocessing is performed, '
+                                             'e.g. not for MOT15 or where prepropressing is explicitly disabled. '
+                                             'Please either check your gt data, or disable preprocessing. '
+                                             'The following invalid classes were found in timestep ' + str(t) + ': ' +
+                                             ' '.join([str(x) for x in invalid_classes])))
+
+                matching_scores = similarity_scores.copy()
+                matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
+                match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
+                match_rows = match_rows[actually_matched_mask]
+                match_cols = match_cols[actually_matched_mask]
+
+                is_distractor_class = np.isin(gt_classes[match_rows], distractor_classes)
+                to_remove_tracker = match_cols[is_distractor_class]
+
+            # Apply preprocessing to remove all unwanted tracker dets.
+            data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+            data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+            data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
+            similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+
+            # Remove gt detections marked as to remove (zero marked), and also remove gt detections not in pedestrian
+            # class (not applicable for MOT15)
+            if self.do_preproc and self.benchmark != 'MOT15':
+                gt_to_keep_mask = (np.not_equal(gt_zero_marked, 0)) & \
+                                  (np.equal(gt_classes, cls_id))
+            else:
+                # There are no classes for MOT15
+                gt_to_keep_mask = np.not_equal(gt_zero_marked, 0)
+            data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
+            data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
+            data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+        data['seq'] = raw_data['seq']
+
+        # Ensure again that ids are unique per timestep after preproc.
+        self._check_unique_ids(data, after_preproc=True)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='xywh')
+        return similarity_scores
--- a/test/yolov7-tracker/tracker/trackeval/datasets/mots_challenge.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/mots_challenge.py
@@ -0,0 +1,446 @@
+import os
+import csv
+import configparser
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ._base_dataset import _BaseDataset
+from .. import utils
+from .. import _timing
+from ..utils import TrackEvalException
+
+
+class MOTSChallenge(_BaseDataset):
+    """Dataset class for MOTS Challenge tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/mot_challenge/'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/mot_challenge/'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'CLASSES_TO_EVAL': ['pedestrian'],  # Valid: ['pedestrian']
+            'SPLIT_TO_EVAL': 'train',  # Valid: 'train', 'test'
+            'INPUT_AS_ZIP': False,  # Whether tracker input files are zipped
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            'SEQMAP_FOLDER': None,  # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
+            'SEQMAP_FILE': None,  # Directly specify seqmap file (if none use seqmap_folder/MOTS-split_to_eval)
+            'SEQ_INFO': None,  # If not None, directly specify sequences to eval and their number of timesteps
+            'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt',  # '{gt_folder}/{seq}/gt/gt.txt'
+            'SKIP_SPLIT_FOL': False,  # If False, data is in GT_FOLDER/MOTS-SPLIT_TO_EVAL/ and in
+                                      # TRACKERS_FOLDER/MOTS-SPLIT_TO_EVAL/tracker/
+                                      # If True, then the middle 'MOTS-split' folder is skipped for both.
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+
+        self.benchmark = 'MOTS'
+        self.gt_set = self.benchmark + '-' + self.config['SPLIT_TO_EVAL']
+        if not self.config['SKIP_SPLIT_FOL']:
+            split_fol = self.gt_set
+        else:
+            split_fol = ''
+        self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
+        self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
+        self.should_classes_combine = False
+        self.use_super_categories = False
+        self.data_is_zipped = self.config['INPUT_AS_ZIP']
+
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+
+        # Get classes to eval
+        self.valid_classes = ['pedestrian']
+        self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+                           for cls in self.config['CLASSES_TO_EVAL']]
+        if not all(self.class_list):
+            raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
+        self.class_name_to_class_id = {'pedestrian': '2', 'ignore': '10'}
+
+        # Get sequences to eval and check gt files exist
+        self.seq_list, self.seq_lengths = self._get_seq_info()
+        if len(self.seq_list) < 1:
+            raise TrackEvalException('No sequences are selected to be evaluated.')
+
+        # Check gt files exist
+        for seq in self.seq_list:
+            if not self.data_is_zipped:
+                curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
+                if not os.path.isfile(curr_file):
+                    print('GT file not found ' + curr_file)
+                    raise TrackEvalException('GT file not found for sequence: ' + seq)
+        if self.data_is_zipped:
+            curr_file = os.path.join(self.gt_fol, 'data.zip')
+            if not os.path.isfile(curr_file):
+                print('GT file not found ' + curr_file)
+                raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        for tracker in self.tracker_list:
+            if self.data_is_zipped:
+                curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+                if not os.path.isfile(curr_file):
+                    print('Tracker file not found: ' + curr_file)
+                    raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
+            else:
+                for seq in self.seq_list:
+                    curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+                    if not os.path.isfile(curr_file):
+                        print('Tracker file not found: ' + curr_file)
+                        raise TrackEvalException(
+                            'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
+                                curr_file))
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _get_seq_info(self):
+        seq_list = []
+        seq_lengths = {}
+        if self.config["SEQ_INFO"]:
+            seq_list = list(self.config["SEQ_INFO"].keys())
+            seq_lengths = self.config["SEQ_INFO"]
+
+            # If sequence length is 'None' tries to read sequence length from .ini files.
+            for seq, seq_length in seq_lengths.items():
+                if seq_length is None:
+                    ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
+                    if not os.path.isfile(ini_file):
+                        raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
+                    ini_data = configparser.ConfigParser()
+                    ini_data.read(ini_file)
+                    seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
+
+        else:
+            if self.config["SEQMAP_FILE"]:
+                seqmap_file = self.config["SEQMAP_FILE"]
+            else:
+                if self.config["SEQMAP_FOLDER"] is None:
+                    seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
+                else:
+                    seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
+            if not os.path.isfile(seqmap_file):
+                print('no seqmap found: ' + seqmap_file)
+                raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
+            with open(seqmap_file) as fp:
+                reader = csv.reader(fp)
+                for i, row in enumerate(reader):
+                    if i == 0 or row[0] == '':
+                        continue
+                    seq = row[0]
+                    seq_list.append(seq)
+                    ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
+                    if not os.path.isfile(ini_file):
+                        raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
+                    ini_data = configparser.ConfigParser()
+                    ini_data.read(ini_file)
+                    seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
+        return seq_list, seq_lengths
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the MOTS Challenge format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets]: list (for each timestep) of lists of detections.
+        [gt_ignore_region]: list (for each timestep) of masks for the ignore regions
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        """
+
+        # Only loaded when run to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        # File location
+        if self.data_is_zipped:
+            if is_gt:
+                zip_file = os.path.join(self.gt_fol, 'data.zip')
+            else:
+                zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+            file = seq + '.txt'
+        else:
+            zip_file = None
+            if is_gt:
+                file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
+            else:
+                file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+
+        # Ignore regions
+        if is_gt:
+            crowd_ignore_filter = {2: ['10']}
+        else:
+            crowd_ignore_filter = None
+
+        # Load raw data from text file
+        read_data, ignore_data = self._load_simple_text_file(file, crowd_ignore_filter=crowd_ignore_filter,
+                                                             is_zipped=self.data_is_zipped, zip_file=zip_file,
+                                                             force_delimiters=' ')
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq]
+        data_keys = ['ids', 'classes', 'dets']
+        if is_gt:
+            data_keys += ['gt_ignore_region']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+
+        # Check for any extra time keys
+        current_time_keys = [str(t + 1) for t in range(num_timesteps)]
+        extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
+        if len(extra_time_keys) > 0:
+            if is_gt:
+                text = 'Ground-truth'
+            else:
+                text = 'Tracking'
+            raise TrackEvalException(
+                text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
+                    [str(x) + ', ' for x in extra_time_keys]))
+
+        for t in range(num_timesteps):
+            time_key = str(t+1)
+            # list to collect all masks of a timestep to check for overlapping areas
+            all_masks = []
+            if time_key in read_data.keys():
+                try:
+                    raw_data['dets'][t] = [{'size': [int(region[3]), int(region[4])],
+                                            'counts': region[5].encode(encoding='UTF-8')}
+                                           for region in read_data[time_key]]
+                    raw_data['ids'][t] = np.atleast_1d([region[1] for region in read_data[time_key]]).astype(int)
+                    raw_data['classes'][t] = np.atleast_1d([region[2] for region in read_data[time_key]]).astype(int)
+                    all_masks += raw_data['dets'][t]
+                except IndexError:
+                    self._raise_index_error(is_gt, tracker, seq)
+                except ValueError:
+                    self._raise_value_error(is_gt, tracker, seq)
+            else:
+                raw_data['dets'][t] = []
+                raw_data['ids'][t] = np.empty(0).astype(int)
+                raw_data['classes'][t] = np.empty(0).astype(int)
+            if is_gt:
+                if time_key in ignore_data.keys():
+                    try:
+                        time_ignore = [{'size': [int(region[3]), int(region[4])],
+                                        'counts': region[5].encode(encoding='UTF-8')}
+                                       for region in ignore_data[time_key]]
+                        raw_data['gt_ignore_region'][t] = mask_utils.merge([mask for mask in time_ignore],
+                                                                           intersect=False)
+                        all_masks += [raw_data['gt_ignore_region'][t]]
+                    except IndexError:
+                        self._raise_index_error(is_gt, tracker, seq)
+                    except ValueError:
+                        self._raise_value_error(is_gt, tracker, seq)
+                else:
+                    raw_data['gt_ignore_region'][t] = mask_utils.merge([], intersect=False)
+
+            # check for overlapping masks
+            if all_masks:
+                masks_merged = all_masks[0]
+                for mask in all_masks[1:]:
+                    if mask_utils.area(mask_utils.merge([masks_merged, mask], intersect=True)) != 0.0:
+                        raise TrackEvalException(
+                            'Tracker has overlapping masks. Tracker: ' + tracker + ' Seq: ' + seq + ' Timestep: ' + str(
+                                t))
+                    masks_merged = mask_utils.merge([masks_merged, mask], intersect=False)
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+        raw_data['num_timesteps'] = num_timesteps
+        raw_data['seq'] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detection masks.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+
+        MOTS Challenge:
+            In MOTS Challenge, the 4 preproc steps are as follow:
+                1) There is only one class (pedestrians) to be evaluated.
+                2) There are no ground truth detections marked as to be removed/distractor classes.
+                    Therefore also no matched tracker detections are removed.
+                3) Ignore regions are used to remove unmatched detections (at least 50% overlap with ignore region).
+                4) There are no ground truth detections (e.g. those of distractor classes) to be removed.
+        """
+        # Check that input data has unique ids
+        self._check_unique_ids(raw_data)
+
+        cls_id = int(self.class_name_to_class_id[cls])
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+        for t in range(raw_data['num_timesteps']):
+
+            # Only extract relevant dets for this class for preproc and eval (cls)
+            gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
+            gt_class_mask = gt_class_mask.astype(np.bool)
+            gt_ids = raw_data['gt_ids'][t][gt_class_mask]
+            gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
+
+            tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
+            tracker_class_mask = tracker_class_mask.astype(np.bool)
+            tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
+            tracker_dets = [raw_data['tracker_dets'][t][ind] for ind in range(len(tracker_class_mask)) if
+                            tracker_class_mask[ind]]
+            similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
+
+            # Match tracker and gt dets (with hungarian algorithm)
+            unmatched_indices = np.arange(tracker_ids.shape[0])
+            if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
+                matching_scores = similarity_scores.copy()
+                matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = -10000
+                match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
+                match_cols = match_cols[actually_matched_mask]
+
+                unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
+
+            # For unmatched tracker dets, remove those that are greater than 50% within a crowd ignore region.
+            unmatched_tracker_dets = [tracker_dets[i] for i in range(len(tracker_dets)) if i in unmatched_indices]
+            ignore_region = raw_data['gt_ignore_region'][t]
+            intersection_with_ignore_region = self._calculate_mask_ious(unmatched_tracker_dets, [ignore_region],
+                                                                        is_encoded=True, do_ioa=True)
+            is_within_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps, axis=1)
+
+            # Apply preprocessing to remove unwanted tracker dets.
+            to_remove_tracker = unmatched_indices[is_within_ignore_region]
+            data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+            data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+            similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+
+            # Keep all ground truth detections
+            data['gt_ids'][t] = gt_ids
+            data['gt_dets'][t] = gt_dets
+            data['similarity_scores'][t] = similarity_scores
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+        data['seq'] = raw_data['seq']
+
+        # Ensure again that ids are unique per timestep after preproc.
+        self._check_unique_ids(data, after_preproc=True)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
+        return similarity_scores
+
+    @staticmethod
+    def _raise_index_error(is_gt, tracker, seq):
+        """
+        Auxiliary method to raise an evaluation error in case of an index error while reading files.
+        :param is_gt: whether gt or tracker data is read
+        :param tracker: the name of the tracker
+        :param seq: the name of the seq
+        :return: None
+        """
+        if is_gt:
+            err = 'Cannot load gt data from sequence %s, because there are not enough ' \
+                  'columns in the data.' % seq
+            raise TrackEvalException(err)
+        else:
+            err = 'Cannot load tracker data from tracker %s, sequence %s, because there are not enough ' \
+                  'columns in the data.' % (tracker, seq)
+            raise TrackEvalException(err)
+
+    @staticmethod
+    def _raise_value_error(is_gt, tracker, seq):
+        """
+        Auxiliary method to raise an evaluation error in case of an value error while reading files.
+        :param is_gt: whether gt or tracker data is read
+        :param tracker: the name of the tracker
+        :param seq: the name of the seq
+        :return: None
+        """
+        if is_gt:
+            raise TrackEvalException(
+                'GT data for sequence %s cannot be converted to the right format. Is data corrupted?' % seq)
+        else:
+            raise TrackEvalException(
+                'Tracking data from tracker %s, sequence %s cannot be converted to the right format. '
+                'Is data corrupted?' % (tracker, seq))
--- a/test/yolov7-tracker/tracker/trackeval/datasets/person_path_22.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/person_path_22.py
@@ -0,0 +1,452 @@
+import os
+import csv
+import configparser
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ._base_dataset import _BaseDataset
+from .. import utils
+from .. import _timing
+from ..utils import TrackEvalException
+
+class PersonPath22(_BaseDataset):
+    """Dataset class for MOT Challenge 2D bounding box tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/person_path_22/'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/person_path_22/'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'CLASSES_TO_EVAL': ['pedestrian'],  # Valid: ['pedestrian']
+            'BENCHMARK': 'person_path_22',  # Valid: 'person_path_22'
+            'SPLIT_TO_EVAL': 'test',  # Valid: 'train', 'test', 'all'
+            'INPUT_AS_ZIP': False,  # Whether tracker input files are zipped
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'DO_PREPROC': True,  # Whether to perform preprocessing
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            'SEQMAP_FOLDER': None,  # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
+            'SEQMAP_FILE': None,  # Directly specify seqmap file (if none use seqmap_folder/benchmark-split_to_eval)
+            'SEQ_INFO': None,  # If not None, directly specify sequences to eval and their number of timesteps
+            'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt',  # '{gt_folder}/{seq}/gt/gt.txt'
+            'SKIP_SPLIT_FOL': False,  # If False, data is in GT_FOLDER/BENCHMARK-SPLIT_TO_EVAL/ and in
+                                      # TRACKERS_FOLDER/BENCHMARK-SPLIT_TO_EVAL/tracker/
+                                      # If True, then the middle 'benchmark-split' folder is skipped for both.
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+
+        self.benchmark = self.config['BENCHMARK']
+        gt_set = self.config['BENCHMARK'] + '-' + self.config['SPLIT_TO_EVAL']
+        self.gt_set = gt_set
+        if not self.config['SKIP_SPLIT_FOL']:
+            split_fol = gt_set
+        else:
+            split_fol = ''
+        self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
+        self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
+        self.should_classes_combine = False
+        self.use_super_categories = False
+        self.data_is_zipped = self.config['INPUT_AS_ZIP']
+        self.do_preproc = self.config['DO_PREPROC']
+
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+
+        # Get classes to eval
+        self.valid_classes = ['pedestrian']
+        self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+                           for cls in self.config['CLASSES_TO_EVAL']]
+        if not all(self.class_list):
+            raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
+        self.class_name_to_class_id = {'pedestrian': 1, 'person_on_vehicle': 2, 'car': 3, 'bicycle': 4, 'motorbike': 5,
+                                       'non_mot_vehicle': 6, 'static_person': 7, 'distractor': 8, 'occluder': 9,
+                                       'occluder_on_ground': 10, 'occluder_full': 11, 'reflection': 12, 'crowd': 13}
+        self.valid_class_numbers = list(self.class_name_to_class_id.values())
+
+        # Get sequences to eval and check gt files exist
+        self.seq_list, self.seq_lengths = self._get_seq_info()
+        if len(self.seq_list) < 1:
+            raise TrackEvalException('No sequences are selected to be evaluated.')
+
+        # Check gt files exist
+        for seq in self.seq_list:
+            if not self.data_is_zipped:
+                curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
+                if not os.path.isfile(curr_file):
+                    print('GT file not found ' + curr_file)
+                    raise TrackEvalException('GT file not found for sequence: ' + seq)
+        if self.data_is_zipped:
+            curr_file = os.path.join(self.gt_fol, 'data.zip')
+            if not os.path.isfile(curr_file):
+                print('GT file not found ' + curr_file)
+                raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        for tracker in self.tracker_list:
+            if self.data_is_zipped:
+                curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+                if not os.path.isfile(curr_file):
+                    print('Tracker file not found: ' + curr_file)
+                    raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
+            else:
+                for seq in self.seq_list:
+                    curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+                    if not os.path.isfile(curr_file):
+                        print('Tracker file not found: ' + curr_file)
+                        raise TrackEvalException(
+                            'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
+                                curr_file))
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _get_seq_info(self):
+        seq_list = []
+        seq_lengths = {}
+        if self.config["SEQ_INFO"]:
+            seq_list = list(self.config["SEQ_INFO"].keys())
+            seq_lengths = self.config["SEQ_INFO"]
+
+            # If sequence length is 'None' tries to read sequence length from .ini files.
+            for seq, seq_length in seq_lengths.items():
+                if seq_length is None:
+                    ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
+                    if not os.path.isfile(ini_file):
+                        raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
+                    ini_data = configparser.ConfigParser()
+                    ini_data.read(ini_file)
+                    seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
+
+        else:
+            if self.config["SEQMAP_FILE"]:
+                seqmap_file = self.config["SEQMAP_FILE"]
+            else:
+                if self.config["SEQMAP_FOLDER"] is None:
+                    seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
+                else:
+                    seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
+            if not os.path.isfile(seqmap_file):
+                print('no seqmap found: ' + seqmap_file)
+                raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
+            with open(seqmap_file) as fp:
+                reader = csv.reader(fp)
+                for i, row in enumerate(reader):
+                    if i == 0 or row[0] == '':
+                        continue
+                    seq = row[0]
+                    seq_list.append(seq)
+                    ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
+                    if not os.path.isfile(ini_file):
+                        raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
+                    ini_data = configparser.ConfigParser()
+                    ini_data.read(ini_file)
+                    seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
+        return seq_list, seq_lengths
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the MOT Challenge 2D box format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
+        [gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        """
+        # File location
+        if self.data_is_zipped:
+            if is_gt:
+                zip_file = os.path.join(self.gt_fol, 'data.zip')
+            else:
+                zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+            file = seq + '.txt'
+        else:
+            zip_file = None
+            if is_gt:
+                file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
+            else:
+                file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+
+        # Ignore regions
+        if is_gt:
+            crowd_ignore_filter = {7: ['13']}
+        else:
+            crowd_ignore_filter = None
+
+        # Load raw data from text file
+        read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file, crowd_ignore_filter=crowd_ignore_filter)
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq]
+        data_keys = ['ids', 'classes', 'dets']
+        if is_gt:
+            data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
+        else:
+            data_keys += ['tracker_confidences']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+
+        # Check for any extra time keys
+        current_time_keys = [str( t+ 1) for t in range(num_timesteps)]
+        extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
+        if len(extra_time_keys) > 0:
+            if is_gt:
+                text = 'Ground-truth'
+            else:
+                text = 'Tracking'
+            raise TrackEvalException(
+                text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
+                    [str(x) + ', ' for x in extra_time_keys]))
+
+        for t in range(num_timesteps):
+            time_key = str(t+1)
+            if time_key in read_data.keys():
+                try:
+                    time_data = np.asarray(read_data[time_key], dtype=np.float)
+                except ValueError:
+                    if is_gt:
+                        raise TrackEvalException(
+                            'Cannot convert gt data for sequence %s to float. Is data corrupted?' % seq)
+                    else:
+                        raise TrackEvalException(
+                            'Cannot convert tracking data from tracker %s, sequence %s to float. Is data corrupted?' % (
+                                tracker, seq))
+                try:
+                    raw_data['dets'][t] = np.atleast_2d(time_data[:, 2:6])
+                    raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
+                except IndexError:
+                    if is_gt:
+                        err = 'Cannot load gt data from sequence %s, because there is not enough ' \
+                              'columns in the data.' % seq
+                        raise TrackEvalException(err)
+                    else:
+                        err = 'Cannot load tracker data from tracker %s, sequence %s, because there is not enough ' \
+                              'columns in the data.' % (tracker, seq)
+                        raise TrackEvalException(err)
+                if time_data.shape[1] >= 8:
+                    raw_data['classes'][t] = np.atleast_1d(time_data[:, 7]).astype(int)
+                else:
+                    if not is_gt:
+                        raw_data['classes'][t] = np.ones_like(raw_data['ids'][t])
+                    else:
+                        raise TrackEvalException(
+                            'GT data is not in a valid format, there is not enough rows in seq %s, timestep %i.' % (
+                                seq, t))
+                if is_gt:
+                    gt_extras_dict = {'zero_marked': np.atleast_1d(time_data[:, 6].astype(int))}
+                    raw_data['gt_extras'][t] = gt_extras_dict
+                else:
+                    raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 6])
+            else:
+                raw_data['dets'][t] = np.empty((0, 4))
+                raw_data['ids'][t] = np.empty(0).astype(int)
+                raw_data['classes'][t] = np.empty(0).astype(int)
+                if is_gt:
+                    gt_extras_dict = {'zero_marked': np.empty(0)}
+                    raw_data['gt_extras'][t] = gt_extras_dict
+                else:
+                    raw_data['tracker_confidences'][t] = np.empty(0)
+            if is_gt:
+                if time_key in ignore_data.keys():
+                    time_ignore = np.asarray(ignore_data[time_key], dtype=np.float)
+                    raw_data['gt_crowd_ignore_regions'][t] = np.atleast_2d(time_ignore[:, 2:6])
+                else:
+                    raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+        raw_data['num_timesteps'] = num_timesteps
+        raw_data['seq'] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+
+        MOT Challenge:
+            In MOT Challenge, the 4 preproc steps are as follow:
+                1) There is only one class (pedestrian) to be evaluated, but all other classes are used for preproc.
+                2) Predictions are matched against all gt boxes (regardless of class), those matching with distractor
+                    objects are removed.
+                3) There is no crowd ignore regions.
+                4) All gt dets except pedestrian are removed, also removes pedestrian gt dets marked with zero_marked.
+        """
+        # Check that input data has unique ids
+        self._check_unique_ids(raw_data)
+
+        distractor_class_names = ['person_on_vehicle', 'static_person', 'distractor', 'reflection']
+        if self.benchmark == 'MOT20':
+            distractor_class_names.append('non_mot_vehicle')
+        distractor_classes = [self.class_name_to_class_id[x] for x in distractor_class_names]
+        cls_id = self.class_name_to_class_id[cls]
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+        for t in range(raw_data['num_timesteps']):
+
+            # Get all data
+            gt_ids = raw_data['gt_ids'][t]
+            gt_dets = raw_data['gt_dets'][t]
+            gt_classes = raw_data['gt_classes'][t]
+            gt_zero_marked = raw_data['gt_extras'][t]['zero_marked']
+
+            tracker_ids = raw_data['tracker_ids'][t]
+            tracker_dets = raw_data['tracker_dets'][t]
+            tracker_classes = raw_data['tracker_classes'][t]
+            tracker_confidences = raw_data['tracker_confidences'][t]
+            similarity_scores = raw_data['similarity_scores'][t]
+            crowd_ignore_regions = raw_data['gt_crowd_ignore_regions'][t]
+
+            # Evaluation is ONLY valid for pedestrian class
+            if len(tracker_classes) > 0 and np.max(tracker_classes) > 1:
+                raise TrackEvalException(
+                    'Evaluation is only valid for pedestrian class. Non pedestrian class (%i) found in sequence %s at '
+                    'timestep %i.' % (np.max(tracker_classes), raw_data['seq'], t))
+
+            # Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
+            # which are labeled as belonging to a distractor class.
+            to_remove_tracker = np.array([], np.int)
+            if self.do_preproc and self.benchmark != 'MOT15' and (gt_ids.shape[0] > 0 or len(crowd_ignore_regions) > 0) and tracker_ids.shape[0] > 0:
+
+                # Check all classes are valid:
+                invalid_classes = np.setdiff1d(np.unique(gt_classes), self.valid_class_numbers)
+                if len(invalid_classes) > 0:
+                    print(' '.join([str(x) for x in invalid_classes]))
+                    raise(TrackEvalException('Attempting to evaluate using invalid gt classes. '
+                                             'This warning only triggers if preprocessing is performed, '
+                                             'e.g. not for MOT15 or where prepropressing is explicitly disabled. '
+                                             'Please either check your gt data, or disable preprocessing. '
+                                             'The following invalid classes were found in timestep ' + str(t) + ': ' +
+                                             ' '.join([str(x) for x in invalid_classes])))
+
+                matching_scores = similarity_scores.copy()
+                matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
+                match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
+                match_rows = match_rows[actually_matched_mask]
+                match_cols = match_cols[actually_matched_mask]
+
+                is_distractor_class = np.isin(gt_classes[match_rows], distractor_classes)
+                to_remove_tracker = match_cols[is_distractor_class]
+
+                # remove bounding boxes that overlap with crowd ignore region.
+                intersection_with_ignore_region = self._calculate_box_ious(tracker_dets, crowd_ignore_regions, box_format='xywh', do_ioa=True)
+                is_within_crowd_ignore_region = np.any(intersection_with_ignore_region > 0.95 + np.finfo('float').eps, axis=1)
+                to_remove_tracker = np.unique(np.concatenate([to_remove_tracker, np.where(is_within_crowd_ignore_region)[0]]))
+
+            # Apply preprocessing to remove all unwanted tracker dets.
+            data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+            data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+            data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
+            similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+
+            # Remove gt detections marked as to remove (zero marked), and also remove gt detections not in pedestrian
+            # class (not applicable for MOT15)
+            if self.do_preproc and self.benchmark != 'MOT15':
+                gt_to_keep_mask = (np.not_equal(gt_zero_marked, 0)) & \
+                                  (np.equal(gt_classes, cls_id))
+            else:
+                # There are no classes for MOT15
+                gt_to_keep_mask = np.not_equal(gt_zero_marked, 0)
+            data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
+            data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
+            data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+        data['seq'] = raw_data['seq']
+
+        # Ensure again that ids are unique per timestep after preproc.
+        self._check_unique_ids(data, after_preproc=True)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='xywh')
+        return similarity_scores
--- a/test/yolov7-tracker/tracker/trackeval/datasets/rob_mots.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/rob_mots.py
@@ -0,0 +1,508 @@
+
+import os
+import csv
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ._base_dataset import _BaseDataset
+from .. import utils
+from ..utils import TrackEvalException
+from .. import _timing
+from ..datasets.rob_mots_classmap import cls_id_to_name
+
+
+class RobMOTS(_BaseDataset):
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/rob_mots'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/rob_mots'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'SUB_BENCHMARK': None,  # REQUIRED. Sub-benchmark to eval. If None, then error.
+            # ['mots_challenge', 'kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'waymo', 'tao']
+            'CLASSES_TO_EVAL': None,  # List of classes to eval. If None, then it does all COCO classes.
+            'SPLIT_TO_EVAL': 'train',  # valid: ['train', 'val', 'test']
+            'INPUT_AS_ZIP': False,  # Whether tracker input files are zipped
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'OUTPUT_SUB_FOLDER': 'results',  # Output files are saved in OUTPUT_FOLDER/DATA_LOC_FORMAT/OUTPUT_SUB_FOLDER
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/DATA_LOC_FORMAT/TRACKER_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            'SEQMAP_FOLDER': None,  # Where seqmaps are found (if None, GT_FOLDER/dataset_subfolder/seqmaps)
+            'SEQMAP_FILE': None,  # Directly specify seqmap file (if none use SEQMAP_FOLDER/BENCHMARK_SPLIT_TO_EVAL)
+            'CLSMAP_FOLDER': None,  # Where seqmaps are found (if None, GT_FOLDER/dataset_subfolder/clsmaps)
+            'CLSMAP_FILE': None,  # Directly specify seqmap file (if none use CLSMAP_FOLDER/BENCHMARK_SPLIT_TO_EVAL)
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config())
+
+        self.split = self.config['SPLIT_TO_EVAL']
+        valid_benchmarks = ['mots_challenge', 'kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'waymo', 'tao']
+        self.box_gt_benchmarks = ['waymo', 'tao']
+
+        self.sub_benchmark = self.config['SUB_BENCHMARK']
+        if not self.sub_benchmark:
+            raise TrackEvalException('SUB_BENCHMARK config input is required (there is no default value)' +
+                                     ', '.join(valid_benchmarks) + ' are valid.')
+        if self.sub_benchmark not in valid_benchmarks:
+            raise TrackEvalException('Attempted to evaluate an invalid benchmark: ' + self.sub_benchmark + '. Only benchmarks ' +
+                                     ', '.join(valid_benchmarks) + ' are valid.')
+
+        self.gt_fol = self.config['GT_FOLDER']
+        self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], self.config['SPLIT_TO_EVAL'])
+        self.data_is_zipped = self.config['INPUT_AS_ZIP']
+
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+        self.output_sub_fol = os.path.join(self.config['OUTPUT_SUB_FOLDER'], self.sub_benchmark)
+
+        # Loops through all sub-benchmarks, and reads in seqmaps to info on all sequences to eval.
+        self._get_seq_info()
+
+        if len(self.seq_list) < 1:
+            raise TrackEvalException('No sequences are selected to be evaluated.')
+
+        valid_class_ids = np.atleast_1d(np.genfromtxt(os.path.join(self.gt_fol, self.split, self.sub_benchmark,
+                                                                   'clsmap.txt')))
+        valid_classes = [cls_id_to_name[int(x)] for x in valid_class_ids] + ['all']
+        self.valid_class_ids = valid_class_ids
+        self.class_name_to_class_id = {cls_name: cls_id for cls_id, cls_name in cls_id_to_name.items()}
+        self.class_name_to_class_id['all'] = -1
+        if not self.config['CLASSES_TO_EVAL']:
+            self.class_list = valid_classes
+        else:
+            self.class_list = [cls if cls in valid_classes else None
+                               for cls in self.config['CLASSES_TO_EVAL']]
+            if not all(self.class_list):
+                raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
+                                         ', '.join(valid_classes) + ' are valid.')
+
+        # Check gt files exist
+        for seq in self.seq_list:
+            if not self.data_is_zipped:
+                curr_file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'data', seq + '.txt')
+                if not os.path.isfile(curr_file):
+                    print('GT file not found ' + curr_file)
+                    raise TrackEvalException('GT file not found for sequence: ' + seq)
+        if self.data_is_zipped:
+            curr_file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'data.zip')
+            if not os.path.isfile(curr_file):
+                raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        for tracker in self.tracker_list:
+            if self.data_is_zipped:
+                curr_file = os.path.join(self.tracker_fol, tracker, 'data.zip')
+                if not os.path.isfile(curr_file):
+                    raise TrackEvalException('Tracker file not found: ' + os.path.basename(curr_file))
+            else:
+                for seq in self.seq_list:
+                    curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, self.sub_benchmark, seq
+                                             + '.txt')
+                    if not os.path.isfile(curr_file):
+                        print('Tracker file not found: ' + curr_file)
+                        raise TrackEvalException(
+                            'Tracker file not found: ' + self.sub_benchmark + '/' + os.path.basename(curr_file))
+
+    def get_name(self):
+        return self.get_class_name() + '.' + self.sub_benchmark
+
+    def _get_seq_info(self):
+        self.seq_list = []
+        self.seq_lengths = {}
+        self.seq_sizes = {}
+        self.seq_ignore_class_ids = {}
+        if self.config["SEQMAP_FILE"]:
+            seqmap_file = self.config["SEQMAP_FILE"]
+        else:
+            if self.config["SEQMAP_FOLDER"] is None:
+                seqmap_file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'seqmap.txt')
+            else:
+                seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.split + '.seqmap')
+        if not os.path.isfile(seqmap_file):
+            print('no seqmap found: ' + seqmap_file)
+            raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
+        with open(seqmap_file) as fp:
+            dialect = csv.Sniffer().sniff(fp.readline(), delimiters=' ')
+            fp.seek(0)
+            reader = csv.reader(fp, dialect)
+            for i, row in enumerate(reader):
+                if len(row) >= 4:
+                    # first col: sequence, second col: sequence length, third and fourth col: sequence height/width
+                    # The rest of the columns list the 'sequence ignore class ids' which are classes not penalized as
+                    # FPs for this sequence.
+                    seq = row[0]
+                    self.seq_list.append(seq)
+                    self.seq_lengths[seq] = int(row[1])
+                    self.seq_sizes[seq] = (int(row[2]), int(row[3]))
+                    self.seq_ignore_class_ids[seq] = [int(row[x]) for x in range(4, len(row))]
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the unified RobMOTS format.
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        """
+        # import to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        # File location
+        if self.data_is_zipped:
+            if is_gt:
+                zip_file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'data.zip')
+            else:
+                zip_file = os.path.join(self.tracker_fol, tracker, 'data.zip')
+            file = seq + '.txt'
+        else:
+            zip_file = None
+            if is_gt:
+                file = os.path.join(self.gt_fol, self.split, self.sub_benchmark, 'data', seq + '.txt')
+            else:
+                file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, self.sub_benchmark, seq + '.txt')
+
+        # Load raw data from text file
+        read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file,
+                                                             force_delimiters=' ')
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq]
+        data_keys = ['ids', 'classes', 'dets']
+        if not is_gt:
+            data_keys += ['tracker_confidences']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+        for t in range(num_timesteps):
+            time_key = str(t)
+            # list to collect all masks of a timestep to check for overlapping areas (for segmentation datasets)
+            all_valid_masks = []
+            if time_key in read_data.keys():
+                try:
+                    raw_data['ids'][t] = np.atleast_1d([det[1] for det in read_data[time_key]]).astype(int)
+                    raw_data['classes'][t] = np.atleast_1d([det[2] for det in read_data[time_key]]).astype(int)
+                    if (not is_gt) or (self.sub_benchmark not in self.box_gt_benchmarks):
+                        raw_data['dets'][t] = [{'size': [int(region[4]), int(region[5])],
+                                                'counts': region[6].encode(encoding='UTF-8')}
+                                               for region in read_data[time_key]]
+                        all_valid_masks += [mask for mask, cls in zip(raw_data['dets'][t], raw_data['classes'][t]) if
+                                      cls < 100]
+                    else:
+                        raw_data['dets'][t] = np.atleast_2d([det[4:8] for det in read_data[time_key]]).astype(float)
+
+                    if not is_gt:
+                        raw_data['tracker_confidences'][t] = np.atleast_1d([det[3] for det
+                                                                            in read_data[time_key]]).astype(float)
+                except IndexError:
+                    self._raise_index_error(is_gt, self.sub_benchmark, seq)
+                except ValueError:
+                    self._raise_value_error(is_gt, self.sub_benchmark, seq)
+            # no detection in this timestep
+            else:
+                if (not is_gt) or (self.sub_benchmark not in self.box_gt_benchmarks):
+                    raw_data['dets'][t] = []
+                else:
+                    raw_data['dets'][t] = np.empty((0, 4)).astype(float)
+                raw_data['ids'][t] = np.empty(0).astype(int)
+                raw_data['classes'][t] = np.empty(0).astype(int)
+                if not is_gt:
+                    raw_data['tracker_confidences'][t] = np.empty(0).astype(float)
+
+            # check for overlapping masks
+            if all_valid_masks:
+                masks_merged = all_valid_masks[0]
+                for mask in all_valid_masks[1:]:
+                    if mask_utils.area(mask_utils.merge([masks_merged, mask], intersect=True)) != 0.0:
+                        err = 'Overlapping masks in frame %d' % t
+                        raise TrackEvalException(err)
+                    masks_merged = mask_utils.merge([masks_merged, mask], intersect=False)
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        raw_data['num_timesteps'] = num_timesteps
+        raw_data['frame_size'] = self.seq_sizes[seq]
+        raw_data['seq'] = seq
+        return raw_data
+
+    @staticmethod
+    def _raise_index_error(is_gt, sub_benchmark, seq):
+        """
+        Auxiliary method to raise an evaluation error in case of an index error while reading files.
+        :param is_gt: whether gt or tracker data is read
+        :param tracker: the name of the tracker
+        :param seq: the name of the seq
+        :return: None
+        """
+        if is_gt:
+            err = 'Cannot load gt data from sequence %s, because there are not enough ' \
+                  'columns in the data.' % seq
+            raise TrackEvalException(err)
+        else:
+            err = 'Cannot load tracker data from benchmark %s, sequence %s, because there are not enough ' \
+                  'columns in the data.' % (sub_benchmark, seq)
+            raise TrackEvalException(err)
+
+    @staticmethod
+    def _raise_value_error(is_gt, sub_benchmark, seq):
+        """
+        Auxiliary method to raise an evaluation error in case of an value error while reading files.
+        :param is_gt: whether gt or tracker data is read
+        :param tracker: the name of the tracker
+        :param seq: the name of the seq
+        :return: None
+        """
+        if is_gt:
+            raise TrackEvalException(
+                'GT data for sequence %s cannot be converted to the right format. Is data corrupted?' % seq)
+        else:
+            raise TrackEvalException(
+                'Tracking data from benchmark %s, sequence %s cannot be converted to the right format. '
+                'Is data corrupted?' % (sub_benchmark, seq))
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            Preprocessing (preproc) occurs in 3 steps.
+                1) Extract only detections relevant for the class to be evaluated.
+                2) Match gt dets and tracker dets. Tracker dets that are to a gt det (TPs) are marked as not to be
+                    removed.
+                3) Remove unmatched tracker dets if they fall within an ignore region or are too small, or if that class
+                    is marked as an ignore class for that sequence.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+            Note that there is a special 'all' class, which evaluates all of the COCO classes together in a
+                'class agnostic' fashion.
+        """
+        # import to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        # Check that input data has unique ids
+        self._check_unique_ids(raw_data)
+
+        cls_id = self.class_name_to_class_id[cls]
+        ignore_class_id = cls_id+100
+        seq = raw_data['seq']
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+
+        for t in range(raw_data['num_timesteps']):
+
+            # Only extract relevant dets for this class
+            if cls == 'all':
+                gt_class_mask = raw_data['gt_classes'][t] < 100
+            # For waymo, combine predictions for [car, truck, bus, motorcycle] into car, because they are all annotated
+            # together as one 'vehicle' class.
+            elif self.sub_benchmark == 'waymo' and cls == 'car':
+                waymo_vehicle_classes = np.array([3, 4, 6, 8])
+                gt_class_mask = np.isin(raw_data['gt_classes'][t], waymo_vehicle_classes)
+            else:
+                gt_class_mask = raw_data['gt_classes'][t] == cls_id
+            gt_class_mask = gt_class_mask.astype(np.bool)
+            gt_ids = raw_data['gt_ids'][t][gt_class_mask]
+            if cls == 'all':
+                ignore_regions_mask = raw_data['gt_classes'][t] >= 100
+            else:
+                ignore_regions_mask = raw_data['gt_classes'][t] == ignore_class_id
+                ignore_regions_mask = np.logical_or(ignore_regions_mask, raw_data['gt_classes'][t] == 100)
+            if self.sub_benchmark in self.box_gt_benchmarks:
+                gt_dets = raw_data['gt_dets'][t][gt_class_mask]
+                ignore_regions_box = raw_data['gt_dets'][t][ignore_regions_mask]
+                if len(ignore_regions_box) > 0:
+                    ignore_regions_box[:, 2] = ignore_regions_box[:, 2] - ignore_regions_box[:, 0]
+                    ignore_regions_box[:, 3] = ignore_regions_box[:, 3] - ignore_regions_box[:, 1]
+                    ignore_regions = mask_utils.frPyObjects(ignore_regions_box, self.seq_sizes[seq][0], self.seq_sizes[seq][1])
+                else:
+                    ignore_regions = []
+            else:
+                gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
+                ignore_regions = [raw_data['gt_dets'][t][ind] for ind in range(len(ignore_regions_mask)) if
+                                  ignore_regions_mask[ind]]
+
+            if cls == 'all':
+                tracker_class_mask = np.ones_like(raw_data['tracker_classes'][t])
+            else:
+                tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
+            tracker_class_mask = tracker_class_mask.astype(np.bool)
+            tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
+            tracker_dets = [raw_data['tracker_dets'][t][ind] for ind in range(len(tracker_class_mask)) if
+                            tracker_class_mask[ind]]
+            tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
+            similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
+            tracker_classes = raw_data['tracker_classes'][t][tracker_class_mask]
+
+            # Only do preproc if there are ignore regions defined to remove
+            if tracker_ids.shape[0] > 0:
+
+                # Match tracker and gt dets (with hungarian algorithm)
+                unmatched_indices = np.arange(tracker_ids.shape[0])
+                if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
+                    matching_scores = similarity_scores.copy()
+                    matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
+                    match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                    actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
+                    # match_rows = match_rows[actually_matched_mask]
+                    match_cols = match_cols[actually_matched_mask]
+                    unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
+
+                # For unmatched tracker dets remove those that are greater than 50% within an ignore region.
+                # unmatched_tracker_dets = tracker_dets[unmatched_indices, :]
+                # crowd_ignore_regions = raw_data['gt_ignore_regions'][t]
+                # intersection_with_ignore_region = self. \
+                #     _calculate_box_ious(unmatched_tracker_dets, crowd_ignore_regions, box_format='x0y0x1y1',
+                #                         do_ioa=True)
+
+
+                if cls_id in self.seq_ignore_class_ids[seq]:
+                    # Remove unmatched detections for classes that are marked as 'ignore' for the whole sequence.
+                    to_remove_tracker = unmatched_indices
+                else:
+                    unmatched_tracker_dets = [tracker_dets[i] for i in range(len(tracker_dets)) if
+                                              i in unmatched_indices]
+
+                    # For unmatched tracker dets remove those that are too small.
+                    tracker_boxes_t = mask_utils.toBbox(unmatched_tracker_dets)
+                    unmatched_widths = tracker_boxes_t[:, 2]
+                    unmatched_heights = tracker_boxes_t[:, 3]
+                    unmatched_size = np.maximum(unmatched_heights, unmatched_widths)
+                    min_size = np.min(self.seq_sizes[seq])/8
+                    is_too_small = unmatched_size <= min_size + np.finfo('float').eps
+
+                    # For unmatched tracker dets remove those that are greater than 50% within an ignore region.
+                    if ignore_regions:
+                        ignore_region_merged = ignore_regions[0]
+                        for mask in ignore_regions[1:]:
+                            ignore_region_merged = mask_utils.merge([ignore_region_merged, mask], intersect=False)
+                        intersection_with_ignore_region = self. \
+                            _calculate_mask_ious(unmatched_tracker_dets, [ignore_region_merged], is_encoded=True, do_ioa=True)
+                        is_within_ignore_region = np.any(intersection_with_ignore_region > 0.5 + np.finfo('float').eps, axis=1)
+                        to_remove_tracker = unmatched_indices[np.logical_or(is_too_small, is_within_ignore_region)]
+                    else:
+                        to_remove_tracker = unmatched_indices[is_too_small]
+
+                # For the special 'all' class, you need to remove unmatched detections from all ignore classes and
+                #   non-evaluated classes.
+                if cls == 'all':
+                    unmatched_tracker_classes = [tracker_classes[i] for i in range(len(tracker_classes)) if
+                                              i in unmatched_indices]
+                    is_ignore_class = np.isin(unmatched_tracker_classes, self.seq_ignore_class_ids[seq])
+                    is_not_evaled_class = np.logical_not(np.isin(unmatched_tracker_classes, self.valid_class_ids))
+                    to_remove_all = unmatched_indices[np.logical_or(is_ignore_class, is_not_evaled_class)]
+                    to_remove_tracker = np.concatenate([to_remove_tracker, to_remove_all], axis=0)
+            else:
+                to_remove_tracker = np.array([], dtype=np.int)
+
+            # remove all unwanted tracker detections
+            data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+            data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+            data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
+            similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+
+            # keep all ground truth detections
+            data['gt_ids'][t] = gt_ids
+            data['gt_dets'][t] = gt_dets
+            data['similarity_scores'][t] = similarity_scores
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+        data['seq'] = raw_data['seq']
+        data['frame_size'] = raw_data['frame_size']
+
+        # Ensure that ids are unique per timestep.
+        self._check_unique_ids(data, after_preproc=True)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+
+        # Only loaded when run to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        if self.sub_benchmark in self.box_gt_benchmarks:
+            # Convert tracker masks to bboxes (for benchmarks with only bbox ground-truth),
+            # and then convert to x0y0x1y1 format.
+            tracker_boxes_t = mask_utils.toBbox(tracker_dets_t)
+            tracker_boxes_t[:, 2] = tracker_boxes_t[:, 0] + tracker_boxes_t[:, 2]
+            tracker_boxes_t[:, 3] = tracker_boxes_t[:, 1] + tracker_boxes_t[:, 3]
+            similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_boxes_t, box_format='x0y0x1y1')
+        else:
+            similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
+        return similarity_scores
--- a/test/yolov7-tracker/tracker/trackeval/datasets/rob_mots_classmap.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/rob_mots_classmap.py
@@ -0,0 +1,81 @@
+cls_id_to_name = {
+ 1: 'person',
+ 2: 'bicycle',
+ 3: 'car',
+ 4: 'motorcycle',
+ 5: 'airplane',
+ 6: 'bus',
+ 7: 'train',
+ 8: 'truck',
+ 9: 'boat',
+ 10: 'traffic light',
+ 11: 'fire hydrant',
+ 12: 'stop sign',
+ 13: 'parking meter',
+ 14: 'bench',
+ 15: 'bird',
+ 16: 'cat',
+ 17: 'dog',
+ 18: 'horse',
+ 19: 'sheep',
+ 20: 'cow',
+ 21: 'elephant',
+ 22: 'bear',
+ 23: 'zebra',
+ 24: 'giraffe',
+ 25: 'backpack',
+ 26: 'umbrella',
+ 27: 'handbag',
+ 28: 'tie',
+ 29: 'suitcase',
+ 30: 'frisbee',
+ 31: 'skis',
+ 32: 'snowboard',
+ 33: 'sports ball',
+ 34: 'kite',
+ 35: 'baseball bat',
+ 36: 'baseball glove',
+ 37: 'skateboard',
+ 38: 'surfboard',
+ 39: 'tennis racket',
+ 40: 'bottle',
+ 41: 'wine glass',
+ 42: 'cup',
+ 43: 'fork',
+ 44: 'knife',
+ 45: 'spoon',
+ 46: 'bowl',
+ 47: 'banana',
+ 48: 'apple',
+ 49: 'sandwich',
+ 50: 'orange',
+ 51: 'broccoli',
+ 52: 'carrot',
+ 53: 'hot dog',
+ 54: 'pizza',
+ 55: 'donut',
+ 56: 'cake',
+ 57: 'chair',
+ 58: 'couch',
+ 59: 'potted plant',
+ 60: 'bed',
+ 61: 'dining table',
+ 62: 'toilet',
+ 63: 'tv',
+ 64: 'laptop',
+ 65: 'mouse',
+ 66: 'remote',
+ 67: 'keyboard',
+ 68: 'cell phone',
+ 69: 'microwave',
+ 70: 'oven',
+ 71: 'toaster',
+ 72: 'sink',
+ 73: 'refrigerator',
+ 74: 'book',
+ 75: 'clock',
+ 76: 'vase',
+ 77: 'scissors',
+ 78: 'teddy bear',
+ 79: 'hair drier',
+ 80: 'toothbrush'}
--- a/test/yolov7-tracker/tracker/trackeval/datasets/run_rob_mots.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/run_rob_mots.py
@@ -0,0 +1,113 @@
+
+# python3 scripts\run_rob_mots.py --ROBMOTS_SPLIT val --TRACKERS_TO_EVAL tracker_name (e.g. STP) --USE_PARALLEL True --NUM_PARALLEL_CORES 4
+
+import sys
+import os
+import csv
+import numpy as np
+from multiprocessing import freeze_support
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+import trackeval  # noqa: E402
+from trackeval import utils
+code_path = utils.get_code_path()
+
+if __name__ == '__main__':
+    freeze_support()
+
+    script_config = {
+        'ROBMOTS_SPLIT': 'train',  # 'train',  # valid: 'train', 'val', 'test', 'test_live', 'test_post', 'test_all'
+        'BENCHMARKS': ['kitti_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'tao'], # 'bdd_mots' coming soon
+        'GT_FOLDER': os.path.join(code_path, 'data/gt/rob_mots'),
+        'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/rob_mots'),
+    }
+
+    default_eval_config = trackeval.Evaluator.get_default_eval_config()
+    default_eval_config['PRINT_ONLY_COMBINED'] = True
+    default_eval_config['DISPLAY_LESS_PROGRESS'] = True
+    default_dataset_config = trackeval.datasets.RobMOTS.get_default_dataset_config()
+    config = {**default_eval_config, **default_dataset_config, **script_config}
+
+    # Command line interface:
+    config = utils.update_config(config)
+
+    if config['ROBMOTS_SPLIT'] == 'val':
+        config['BENCHMARKS'] = ['kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis',
+                                       'tao', 'mots_challenge']
+        config['SPLIT_TO_EVAL'] = 'val'
+    elif config['ROBMOTS_SPLIT'] == 'test' or config['SPLIT_TO_EVAL'] == 'test_live':
+        config['BENCHMARKS'] = ['kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'tao']
+        config['SPLIT_TO_EVAL'] = 'test'
+    elif config['ROBMOTS_SPLIT'] == 'test_post':
+        config['BENCHMARKS'] = ['mots_challenge', 'waymo']
+        config['SPLIT_TO_EVAL'] = 'test'
+    elif config['ROBMOTS_SPLIT'] == 'test_all':
+        config['BENCHMARKS'] = ['kitti_mots', 'bdd_mots', 'davis_unsupervised', 'youtube_vis', 'ovis',
+                                       'tao', 'mots_challenge', 'waymo']
+        config['SPLIT_TO_EVAL'] = 'test'
+    elif config['ROBMOTS_SPLIT'] == 'train':
+        config['BENCHMARKS'] = ['kitti_mots', 'davis_unsupervised', 'youtube_vis', 'ovis', 'tao']  # 'bdd_mots' coming soon
+        config['SPLIT_TO_EVAL'] = 'train'
+
+    metrics_config = {'METRICS': ['HOTA']}
+    # metrics_config = {'METRICS': ['HOTA', 'CLEAR', 'Identity']}
+    eval_config = {k: v for k, v in config.items() if k in config.keys()}
+    dataset_config = {k: v for k, v in config.items() if k in config.keys()}
+
+    # Run code
+    dataset_list = []
+    for bench in config['BENCHMARKS']:
+        dataset_config['SUB_BENCHMARK'] = bench
+        dataset_list.append(trackeval.datasets.RobMOTS(dataset_config))
+    evaluator = trackeval.Evaluator(eval_config)
+    metrics_list = []
+    for metric in [trackeval.metrics.HOTA, trackeval.metrics.CLEAR, trackeval.metrics.Identity]:
+        if metric.get_name() in metrics_config['METRICS']:
+            metrics_list.append(metric())
+    if len(metrics_list) == 0:
+        raise Exception('No metrics selected for evaluation')
+    output_res, output_msg = evaluator.evaluate(dataset_list, metrics_list)
+
+
+    # For each benchmark, combine the 'all' score with the 'cls_averaged' using geometric mean.
+    metrics_to_calc = ['HOTA', 'DetA', 'AssA', 'DetRe', 'DetPr', 'AssRe', 'AssPr', 'LocA']
+    trackers = list(output_res['RobMOTS.' + config['BENCHMARKS'][0]].keys())
+    for tracker in trackers:
+        # final_results[benchmark][result_type][metric]
+        final_results = {}
+        res = {bench: output_res['RobMOTS.' + bench][tracker]['COMBINED_SEQ'] for bench in config['BENCHMARKS']}
+        for bench in config['BENCHMARKS']:
+            final_results[bench] = {'cls_av': {}, 'det_av': {}, 'final': {}}
+            for metric in metrics_to_calc:
+                final_results[bench]['cls_av'][metric] = np.mean(res[bench]['cls_comb_cls_av']['HOTA'][metric])
+                final_results[bench]['det_av'][metric] = np.mean(res[bench]['all']['HOTA'][metric])
+                final_results[bench]['final'][metric] = \
+                    np.sqrt(final_results[bench]['cls_av'][metric] * final_results[bench]['det_av'][metric])
+
+        # Take the arithmetic mean over all the benchmarks
+        final_results['overall'] = {'cls_av': {}, 'det_av': {}, 'final': {}}
+        for metric in metrics_to_calc:
+            final_results['overall']['cls_av'][metric] = \
+                np.mean([final_results[bench]['cls_av'][metric] for bench in config['BENCHMARKS']])
+            final_results['overall']['det_av'][metric] = \
+                np.mean([final_results[bench]['det_av'][metric] for bench in config['BENCHMARKS']])
+            final_results['overall']['final'][metric] = \
+                np.mean([final_results[bench]['final'][metric] for bench in config['BENCHMARKS']])
+
+        # Save out result
+        headers = [config['SPLIT_TO_EVAL']] + [x + '___' + metric for x in ['f', 'c', 'd'] for metric in metrics_to_calc]
+
+        def rowify(d):
+            return [d[x][metric] for x in ['final', 'cls_av', 'det_av'] for metric in metrics_to_calc]
+
+        out_file = os.path.join(script_config['TRACKERS_FOLDER'], script_config['ROBMOTS_SPLIT'], tracker,
+                                'final_results.csv')
+
+        with open(out_file, 'w', newline='') as f:
+            writer = csv.writer(f, delimiter=',')
+            writer.writerow(headers)
+            writer.writerow(['overall'] + rowify(final_results['overall']))
+            for bench in config['BENCHMARKS']:
+                if bench == 'overall':
+                    continue
+                writer.writerow([bench] + rowify(final_results[bench]))
--- a/test/yolov7-tracker/tracker/trackeval/datasets/tao.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/tao.py
@@ -0,0 +1,566 @@
+import os
+import numpy as np
+import json
+import itertools
+from collections import defaultdict
+from scipy.optimize import linear_sum_assignment
+from ..utils import TrackEvalException
+from ._base_dataset import _BaseDataset
+from .. import utils
+from .. import _timing
+
+
+class TAO(_BaseDataset):
+    """Dataset class for TAO tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'CLASSES_TO_EVAL': None,  # Classes to eval (if None, all classes)
+            'SPLIT_TO_EVAL': 'training',  # Valid: 'training', 'val'
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            'MAX_DETECTIONS': 300,  # Number of maximal allowed detections per image (0 for unlimited)
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+        self.gt_fol = self.config['GT_FOLDER']
+        self.tracker_fol = self.config['TRACKERS_FOLDER']
+        self.should_classes_combine = True
+        self.use_super_categories = False
+
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+
+        gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
+        if len(gt_dir_files) != 1:
+            raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
+
+        with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
+            self.gt_data = json.load(f)
+
+        # merge categories marked with a merged tag in TAO dataset
+        self._merge_categories(self.gt_data['annotations'] + self.gt_data['tracks'])
+
+        # Get sequences to eval and sequence information
+        self.seq_list = [vid['name'].replace('/', '-') for vid in self.gt_data['videos']]
+        self.seq_name_to_seq_id = {vid['name'].replace('/', '-'): vid['id'] for vid in self.gt_data['videos']}
+        # compute mappings from videos to annotation data
+        self.videos_to_gt_tracks, self.videos_to_gt_images = self._compute_vid_mappings(self.gt_data['annotations'])
+        # compute sequence lengths
+        self.seq_lengths = {vid['id']: 0 for vid in self.gt_data['videos']}
+        for img in self.gt_data['images']:
+            self.seq_lengths[img['video_id']] += 1
+        self.seq_to_images_to_timestep = self._compute_image_to_timestep_mappings()
+        self.seq_to_classes = {vid['id']: {'pos_cat_ids': list({track['category_id'] for track
+                                                                in self.videos_to_gt_tracks[vid['id']]}),
+                                           'neg_cat_ids': vid['neg_category_ids'],
+                                           'not_exhaustively_labeled_cat_ids': vid['not_exhaustive_category_ids']}
+                               for vid in self.gt_data['videos']}
+
+        # Get classes to eval
+        considered_vid_ids = [self.seq_name_to_seq_id[vid] for vid in self.seq_list]
+        seen_cats = set([cat_id for vid_id in considered_vid_ids for cat_id
+                         in self.seq_to_classes[vid_id]['pos_cat_ids']])
+        # only classes with ground truth are evaluated in TAO
+        self.valid_classes = [cls['name'] for cls in self.gt_data['categories'] if cls['id'] in seen_cats]
+        cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
+
+        if self.config['CLASSES_TO_EVAL']:
+            self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+                               for cls in self.config['CLASSES_TO_EVAL']]
+            if not all(self.class_list):
+                raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
+                                         ', '.join(self.valid_classes) +
+                                         ' are valid (classes present in ground truth data).')
+        else:
+            self.class_list = [cls for cls in self.valid_classes]
+        self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
+
+        for tracker in self.tracker_list:
+            tr_dir_files = [file for file in os.listdir(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol))
+                            if file.endswith('.json')]
+            if len(tr_dir_files) != 1:
+                raise TrackEvalException(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
+                                         + ' does not contain exactly one json file.')
+            with open(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, tr_dir_files[0])) as f:
+                curr_data = json.load(f)
+
+            # limit detections if MAX_DETECTIONS > 0
+            if self.config['MAX_DETECTIONS']:
+                curr_data = self._limit_dets_per_image(curr_data)
+
+            # fill missing video ids
+            self._fill_video_ids_inplace(curr_data)
+
+            # make track ids unique over whole evaluation set
+            self._make_track_ids_unique(curr_data)
+
+            # merge categories marked with a merged tag in TAO dataset
+            self._merge_categories(curr_data)
+
+            # get tracker sequence information
+            curr_videos_to_tracker_tracks, curr_videos_to_tracker_images = self._compute_vid_mappings(curr_data)
+            self.tracker_data[tracker]['vids_to_tracks'] = curr_videos_to_tracker_tracks
+            self.tracker_data[tracker]['vids_to_images'] = curr_videos_to_tracker_images
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the TAO format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets]: list (for each timestep) of lists of detections.
+        [classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_lengths]: dictionary with class values
+                                as keys and lists (for each track) as values
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        [classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_dt_track_ids, classes_to_dt_track_areas, classes_to_dt_track_lengths]: dictionary with class values
+                                                                                           as keys and lists as values
+        [classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
+        """
+        seq_id = self.seq_name_to_seq_id[seq]
+        # File location
+        if is_gt:
+            imgs = self.videos_to_gt_images[seq_id]
+        else:
+            imgs = self.tracker_data[tracker]['vids_to_images'][seq_id]
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq_id]
+        img_to_timestep = self.seq_to_images_to_timestep[seq_id]
+        data_keys = ['ids', 'classes', 'dets']
+        if not is_gt:
+            data_keys += ['tracker_confidences']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+        for img in imgs:
+            # some tracker data contains images without any ground truth information, these are ignored
+            try:
+                t = img_to_timestep[img['id']]
+            except KeyError:
+                continue
+            annotations = img['annotations']
+            raw_data['dets'][t] = np.atleast_2d([ann['bbox'] for ann in annotations]).astype(float)
+            raw_data['ids'][t] = np.atleast_1d([ann['track_id'] for ann in annotations]).astype(int)
+            raw_data['classes'][t] = np.atleast_1d([ann['category_id'] for ann in annotations]).astype(int)
+            if not is_gt:
+                raw_data['tracker_confidences'][t] = np.atleast_1d([ann['score'] for ann in annotations]).astype(float)
+
+        for t, d in enumerate(raw_data['dets']):
+            if d is None:
+                raw_data['dets'][t] = np.empty((0, 4)).astype(float)
+                raw_data['ids'][t] = np.empty(0).astype(int)
+                raw_data['classes'][t] = np.empty(0).astype(int)
+                if not is_gt:
+                    raw_data['tracker_confidences'][t] = np.empty(0)
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        all_classes = [self.class_name_to_class_id[cls] for cls in self.class_list]
+        if is_gt:
+            classes_to_consider = all_classes
+            all_tracks = self.videos_to_gt_tracks[seq_id]
+        else:
+            classes_to_consider = self.seq_to_classes[seq_id]['pos_cat_ids'] \
+                                  + self.seq_to_classes[seq_id]['neg_cat_ids']
+            all_tracks = self.tracker_data[tracker]['vids_to_tracks'][seq_id]
+
+        classes_to_tracks = {cls: [track for track in all_tracks if track['category_id'] == cls]
+                             if cls in classes_to_consider else [] for cls in all_classes}
+
+        # mapping from classes to track information
+        raw_data['classes_to_tracks'] = {cls: [{det['image_id']: np.atleast_1d(det['bbox'])
+                                                for det in track['annotations']} for track in tracks]
+                                         for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
+                                            for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
+                                              for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_lengths'] = {cls: [len(track['annotations']) for track in tracks]
+                                                for cls, tracks in classes_to_tracks.items()}
+
+        if not is_gt:
+            raw_data['classes_to_dt_track_scores'] = {cls: np.array([np.mean([float(x['score'])
+                                                                              for x in track['annotations']])
+                                                                     for track in tracks])
+                                                      for cls, tracks in classes_to_tracks.items()}
+
+        if is_gt:
+            key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
+                       'classes_to_track_ids': 'classes_to_gt_track_ids',
+                       'classes_to_track_lengths': 'classes_to_gt_track_lengths',
+                       'classes_to_track_areas': 'classes_to_gt_track_areas'}
+        else:
+            key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
+                       'classes_to_track_ids': 'classes_to_dt_track_ids',
+                       'classes_to_track_lengths': 'classes_to_dt_track_lengths',
+                       'classes_to_track_areas': 'classes_to_dt_track_areas'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        raw_data['num_timesteps'] = num_timesteps
+        raw_data['neg_cat_ids'] = self.seq_to_classes[seq_id]['neg_cat_ids']
+        raw_data['not_exhaustively_labeled_cls'] = self.seq_to_classes[seq_id]['not_exhaustively_labeled_cat_ids']
+        raw_data['seq'] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+        TAO:
+            In TAO, the 4 preproc steps are as follow:
+                1) All classes present in the ground truth data are evaluated separately.
+                2) No matched tracker detections are removed.
+                3) Unmatched tracker detections are removed if there is not ground truth data and the class does not
+                    belong to the categories marked as negative for this sequence. Additionally, unmatched tracker
+                    detections for classes which are marked as not exhaustively labeled are removed.
+                4) No gt detections are removed.
+            Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
+            and the tracks from the tracker data are sorted according to the tracker confidence.
+        """
+        cls_id = self.class_name_to_class_id[cls]
+        is_not_exhaustively_labeled = cls_id in raw_data['not_exhaustively_labeled_cls']
+        is_neg_category = cls_id in raw_data['neg_cat_ids']
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+        for t in range(raw_data['num_timesteps']):
+
+            # Only extract relevant dets for this class for preproc and eval (cls)
+            gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
+            gt_class_mask = gt_class_mask.astype(np.bool)
+            gt_ids = raw_data['gt_ids'][t][gt_class_mask]
+            gt_dets = raw_data['gt_dets'][t][gt_class_mask]
+
+            tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
+            tracker_class_mask = tracker_class_mask.astype(np.bool)
+            tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
+            tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
+            tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
+            similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
+
+            # Match tracker and gt dets (with hungarian algorithm).
+            unmatched_indices = np.arange(tracker_ids.shape[0])
+            if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
+                matching_scores = similarity_scores.copy()
+                matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
+                match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
+                match_cols = match_cols[actually_matched_mask]
+                unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
+
+            if gt_ids.shape[0] == 0 and not is_neg_category:
+                to_remove_tracker = unmatched_indices
+            elif is_not_exhaustively_labeled:
+                to_remove_tracker = unmatched_indices
+            else:
+                to_remove_tracker = np.array([], dtype=np.int)
+
+            # remove all unwanted unmatched tracker detections
+            data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+            data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+            data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
+            similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+
+            data['gt_ids'][t] = gt_ids
+            data['gt_dets'][t] = gt_dets
+            data['similarity_scores'][t] = similarity_scores
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+        data['seq'] = raw_data['seq']
+
+        # get track representations
+        data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
+        data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
+        data['gt_track_lengths'] = raw_data['classes_to_gt_track_lengths'][cls_id]
+        data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
+        data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
+        data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
+        data['dt_track_lengths'] = raw_data['classes_to_dt_track_lengths'][cls_id]
+        data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
+        data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
+        data['not_exhaustively_labeled'] = is_not_exhaustively_labeled
+        data['iou_type'] = 'bbox'
+
+        # sort tracker data tracks by tracker confidence scores
+        if data['dt_tracks']:
+            idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
+            data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
+            data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
+            data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
+            data['dt_track_lengths'] = [data['dt_track_lengths'][i] for i in idx]
+            data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
+        # Ensure that ids are unique per timestep.
+        self._check_unique_ids(data)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t)
+        return similarity_scores
+
+    def _merge_categories(self, annotations):
+        """
+        Merges categories with a merged tag. Adapted from https://github.com/TAO-Dataset
+        :param annotations: the annotations in which the classes should be merged
+        :return: None
+        """
+        merge_map = {}
+        for category in self.gt_data['categories']:
+            if 'merged' in category:
+                for to_merge in category['merged']:
+                    merge_map[to_merge['id']] = category['id']
+
+        for ann in annotations:
+            ann['category_id'] = merge_map.get(ann['category_id'], ann['category_id'])
+
+    def _compute_vid_mappings(self, annotations):
+        """
+        Computes mappings from Videos to corresponding tracks and images.
+        :param annotations: the annotations for which the mapping should be generated
+        :return: the video-to-track-mapping, the video-to-image-mapping
+        """
+        vids_to_tracks = {}
+        vids_to_imgs = {}
+        vid_ids = [vid['id'] for vid in self.gt_data['videos']]
+
+        # compute an mapping from image IDs to images
+        images = {}
+        for image in self.gt_data['images']:
+            images[image['id']] = image
+
+        for ann in annotations:
+            ann["area"] = ann["bbox"][2] * ann["bbox"][3]
+
+            vid = ann["video_id"]
+            if ann["video_id"] not in vids_to_tracks.keys():
+                vids_to_tracks[ann["video_id"]] = list()
+            if ann["video_id"] not in vids_to_imgs.keys():
+                vids_to_imgs[ann["video_id"]] = list()
+
+            # Fill in vids_to_tracks
+            tid = ann["track_id"]
+            exist_tids = [track["id"] for track in vids_to_tracks[vid]]
+            try:
+                index1 = exist_tids.index(tid)
+            except ValueError:
+                index1 = -1
+            if tid not in exist_tids:
+                curr_track = {"id": tid, "category_id": ann['category_id'],
+                              "video_id": vid, "annotations": [ann]}
+                vids_to_tracks[vid].append(curr_track)
+            else:
+                vids_to_tracks[vid][index1]["annotations"].append(ann)
+
+            # Fill in vids_to_imgs
+            img_id = ann['image_id']
+            exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
+            try:
+                index2 = exist_img_ids.index(img_id)
+            except ValueError:
+                index2 = -1
+            if index2 == -1:
+                curr_img = {"id": img_id, "annotations": [ann]}
+                vids_to_imgs[vid].append(curr_img)
+            else:
+                vids_to_imgs[vid][index2]["annotations"].append(ann)
+
+        # sort annotations by frame index and compute track area
+        for vid, tracks in vids_to_tracks.items():
+            for track in tracks:
+                track["annotations"] = sorted(
+                    track['annotations'],
+                    key=lambda x: images[x['image_id']]['frame_index'])
+                # Computer average area
+                track["area"] = (sum(x['area'] for x in track['annotations']) / len(track['annotations']))
+
+        # Ensure all videos are present
+        for vid_id in vid_ids:
+            if vid_id not in vids_to_tracks.keys():
+                vids_to_tracks[vid_id] = []
+            if vid_id not in vids_to_imgs.keys():
+                vids_to_imgs[vid_id] = []
+
+        return vids_to_tracks, vids_to_imgs
+
+    def _compute_image_to_timestep_mappings(self):
+        """
+        Computes a mapping from images to the corresponding timestep in the sequence.
+        :return: the image-to-timestep-mapping
+        """
+        images = {}
+        for image in self.gt_data['images']:
+            images[image['id']] = image
+
+        seq_to_imgs_to_timestep = {vid['id']: dict() for vid in self.gt_data['videos']}
+        for vid in seq_to_imgs_to_timestep:
+            curr_imgs = [img['id'] for img in self.videos_to_gt_images[vid]]
+            curr_imgs = sorted(curr_imgs, key=lambda x: images[x]['frame_index'])
+            seq_to_imgs_to_timestep[vid] = {curr_imgs[i]: i for i in range(len(curr_imgs))}
+
+        return seq_to_imgs_to_timestep
+
+    def _limit_dets_per_image(self, annotations):
+        """
+        Limits the number of detections for each image to config['MAX_DETECTIONS']. Adapted from
+        https://github.com/TAO-Dataset/
+        :param annotations: the annotations in which the detections should be limited
+        :return: the annotations with limited detections
+        """
+        max_dets = self.config['MAX_DETECTIONS']
+        img_ann = defaultdict(list)
+        for ann in annotations:
+            img_ann[ann["image_id"]].append(ann)
+
+        for img_id, _anns in img_ann.items():
+            if len(_anns) <= max_dets:
+                continue
+            _anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
+            img_ann[img_id] = _anns[:max_dets]
+
+        return [ann for anns in img_ann.values() for ann in anns]
+
+    def _fill_video_ids_inplace(self, annotations):
+        """
+        Fills in missing video IDs inplace. Adapted from https://github.com/TAO-Dataset/
+        :param annotations: the annotations for which the videos IDs should be filled inplace
+        :return: None
+        """
+        missing_video_id = [x for x in annotations if 'video_id' not in x]
+        if missing_video_id:
+            image_id_to_video_id = {
+                x['id']: x['video_id'] for x in self.gt_data['images']
+            }
+            for x in missing_video_id:
+                x['video_id'] = image_id_to_video_id[x['image_id']]
+
+    @staticmethod
+    def _make_track_ids_unique(annotations):
+        """
+        Makes the track IDs unqiue over the whole annotation set. Adapted from https://github.com/TAO-Dataset/
+        :param annotations: the annotation set
+        :return: the number of updated IDs
+        """
+        track_id_videos = {}
+        track_ids_to_update = set()
+        max_track_id = 0
+        for ann in annotations:
+            t = ann['track_id']
+            if t not in track_id_videos:
+                track_id_videos[t] = ann['video_id']
+
+            if ann['video_id'] != track_id_videos[t]:
+                # Track id is assigned to multiple videos
+                track_ids_to_update.add(t)
+            max_track_id = max(max_track_id, t)
+
+        if track_ids_to_update:
+            print('true')
+            next_id = itertools.count(max_track_id + 1)
+            new_track_ids = defaultdict(lambda: next(next_id))
+            for ann in annotations:
+                t = ann['track_id']
+                v = ann['video_id']
+                if t in track_ids_to_update:
+                    ann['track_id'] = new_track_ids[t, v]
+        return len(track_ids_to_update)
--- a/test/yolov7-tracker/tracker/trackeval/datasets/tao_ow.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/tao_ow.py
@@ -0,0 +1,652 @@
+import os
+import numpy as np
+import json
+import itertools
+from collections import defaultdict
+from scipy.optimize import linear_sum_assignment
+from ..utils import TrackEvalException
+from ._base_dataset import _BaseDataset
+from .. import utils
+from .. import _timing
+
+
+class TAO_OW(_BaseDataset):
+    """Dataset class for TAO tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'CLASSES_TO_EVAL': None,  # Classes to eval (if None, all classes)
+            'SPLIT_TO_EVAL': 'training',  # Valid: 'training', 'val'
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            'MAX_DETECTIONS': 300,  # Number of maximal allowed detections per image (0 for unlimited)
+            'SUBSET': 'all'
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+        self.gt_fol = self.config['GT_FOLDER']
+        self.tracker_fol = self.config['TRACKERS_FOLDER']
+        self.should_classes_combine = True
+        self.use_super_categories = False
+
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+
+        gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
+        if len(gt_dir_files) != 1:
+            raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
+
+        with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
+            self.gt_data = json.load(f)
+
+        self.subset = self.config['SUBSET']
+        if self.subset != 'all':
+            # Split GT data into `known`, `unknown` or `distractor`
+            self._split_known_unknown_distractor()
+            self.gt_data = self._filter_gt_data(self.gt_data)
+
+        # merge categories marked with a merged tag in TAO dataset
+        self._merge_categories(self.gt_data['annotations'] + self.gt_data['tracks'])
+
+        # Get sequences to eval and sequence information
+        self.seq_list = [vid['name'].replace('/', '-') for vid in self.gt_data['videos']]
+        self.seq_name_to_seq_id = {vid['name'].replace('/', '-'): vid['id'] for vid in self.gt_data['videos']}
+        # compute mappings from videos to annotation data
+        self.videos_to_gt_tracks, self.videos_to_gt_images = self._compute_vid_mappings(self.gt_data['annotations'])
+        # compute sequence lengths
+        self.seq_lengths = {vid['id']: 0 for vid in self.gt_data['videos']}
+        for img in self.gt_data['images']:
+            self.seq_lengths[img['video_id']] += 1
+        self.seq_to_images_to_timestep = self._compute_image_to_timestep_mappings()
+        self.seq_to_classes = {vid['id']: {'pos_cat_ids': list({track['category_id'] for track
+                                                                in self.videos_to_gt_tracks[vid['id']]}),
+                                           'neg_cat_ids': vid['neg_category_ids'],
+                                           'not_exhaustively_labeled_cat_ids': vid['not_exhaustive_category_ids']}
+                               for vid in self.gt_data['videos']}
+
+        # Get classes to eval
+        considered_vid_ids = [self.seq_name_to_seq_id[vid] for vid in self.seq_list]
+        seen_cats = set([cat_id for vid_id in considered_vid_ids for cat_id
+                         in self.seq_to_classes[vid_id]['pos_cat_ids']])
+        # only classes with ground truth are evaluated in TAO
+        self.valid_classes = [cls['name'] for cls in self.gt_data['categories'] if cls['id'] in seen_cats]
+        # cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
+
+        if self.config['CLASSES_TO_EVAL']:
+            # self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+            #                    for cls in self.config['CLASSES_TO_EVAL']]
+            self.class_list = ["object"]  # class-agnostic
+            if not all(self.class_list):
+                raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
+                                         ', '.join(self.valid_classes) +
+                                         ' are valid (classes present in ground truth data).')
+        else:
+            # self.class_list = [cls for cls in self.valid_classes]
+            self.class_list = ["object"]  # class-agnostic
+        # self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
+        self.class_name_to_class_id = {"object": 1}  # class-agnostic
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
+
+        for tracker in self.tracker_list:
+            tr_dir_files = [file for file in os.listdir(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol))
+                            if file.endswith('.json')]
+            if len(tr_dir_files) != 1:
+                raise TrackEvalException(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
+                                         + ' does not contain exactly one json file.')
+            with open(os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, tr_dir_files[0])) as f:
+                curr_data = json.load(f)
+
+            # limit detections if MAX_DETECTIONS > 0
+            if self.config['MAX_DETECTIONS']:
+                curr_data = self._limit_dets_per_image(curr_data)
+
+            # fill missing video ids
+            self._fill_video_ids_inplace(curr_data)
+
+            # make track ids unique over whole evaluation set
+            self._make_track_ids_unique(curr_data)
+
+            # merge categories marked with a merged tag in TAO dataset
+            self._merge_categories(curr_data)
+
+            # get tracker sequence information
+            curr_videos_to_tracker_tracks, curr_videos_to_tracker_images = self._compute_vid_mappings(curr_data)
+            self.tracker_data[tracker]['vids_to_tracks'] = curr_videos_to_tracker_tracks
+            self.tracker_data[tracker]['vids_to_images'] = curr_videos_to_tracker_images
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the TAO format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets]: list (for each timestep) of lists of detections.
+        [classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_lengths]: dictionary with class values
+                                as keys and lists (for each track) as values
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        [classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_dt_track_ids, classes_to_dt_track_areas, classes_to_dt_track_lengths]: dictionary with class values
+                                                                                           as keys and lists as values
+        [classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
+        """
+        seq_id = self.seq_name_to_seq_id[seq]
+        # File location
+        if is_gt:
+            imgs = self.videos_to_gt_images[seq_id]
+        else:
+            imgs = self.tracker_data[tracker]['vids_to_images'][seq_id]
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq_id]
+        img_to_timestep = self.seq_to_images_to_timestep[seq_id]
+        data_keys = ['ids', 'classes', 'dets']
+        if not is_gt:
+            data_keys += ['tracker_confidences']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+        for img in imgs:
+            # some tracker data contains images without any ground truth information, these are ignored
+            try:
+                t = img_to_timestep[img['id']]
+            except KeyError:
+                continue
+            annotations = img['annotations']
+            raw_data['dets'][t] = np.atleast_2d([ann['bbox'] for ann in annotations]).astype(float)
+            raw_data['ids'][t] = np.atleast_1d([ann['track_id'] for ann in annotations]).astype(int)
+            raw_data['classes'][t] = np.atleast_1d([1 for _ in annotations]).astype(int)   # class-agnostic
+            if not is_gt:
+                raw_data['tracker_confidences'][t] = np.atleast_1d([ann['score'] for ann in annotations]).astype(float)
+
+        for t, d in enumerate(raw_data['dets']):
+            if d is None:
+                raw_data['dets'][t] = np.empty((0, 4)).astype(float)
+                raw_data['ids'][t] = np.empty(0).astype(int)
+                raw_data['classes'][t] = np.empty(0).astype(int)
+                if not is_gt:
+                    raw_data['tracker_confidences'][t] = np.empty(0)
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        # all_classes = [self.class_name_to_class_id[cls] for cls in self.class_list]
+        all_classes = [1]  # class-agnostic
+
+        if is_gt:
+            classes_to_consider = all_classes
+            all_tracks = self.videos_to_gt_tracks[seq_id]
+        else:
+            # classes_to_consider = self.seq_to_classes[seq_id]['pos_cat_ids'] \
+            #                       + self.seq_to_classes[seq_id]['neg_cat_ids']
+            classes_to_consider = all_classes  # class-agnostic
+            all_tracks = self.tracker_data[tracker]['vids_to_tracks'][seq_id]
+
+        # classes_to_tracks = {cls: [track for track in all_tracks if track['category_id'] == cls]
+        #                      if cls in classes_to_consider else [] for cls in all_classes}
+        classes_to_tracks = {cls: [track for track in all_tracks]
+        if cls in classes_to_consider else [] for cls in all_classes}  # class-agnostic
+
+        # mapping from classes to track information
+        raw_data['classes_to_tracks'] = {cls: [{det['image_id']: np.atleast_1d(det['bbox'])
+                                                for det in track['annotations']} for track in tracks]
+                                         for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
+                                            for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
+                                              for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_lengths'] = {cls: [len(track['annotations']) for track in tracks]
+                                                for cls, tracks in classes_to_tracks.items()}
+
+        if not is_gt:
+            raw_data['classes_to_dt_track_scores'] = {cls: np.array([np.mean([float(x['score'])
+                                                                              for x in track['annotations']])
+                                                                     for track in tracks])
+                                                      for cls, tracks in classes_to_tracks.items()}
+
+        if is_gt:
+            key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
+                       'classes_to_track_ids': 'classes_to_gt_track_ids',
+                       'classes_to_track_lengths': 'classes_to_gt_track_lengths',
+                       'classes_to_track_areas': 'classes_to_gt_track_areas'}
+        else:
+            key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
+                       'classes_to_track_ids': 'classes_to_dt_track_ids',
+                       'classes_to_track_lengths': 'classes_to_dt_track_lengths',
+                       'classes_to_track_areas': 'classes_to_dt_track_areas'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        raw_data['num_timesteps'] = num_timesteps
+        raw_data['neg_cat_ids'] = self.seq_to_classes[seq_id]['neg_cat_ids']
+        raw_data['not_exhaustively_labeled_cls'] = self.seq_to_classes[seq_id]['not_exhaustively_labeled_cat_ids']
+        raw_data['seq'] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+        TAO:
+            In TAO, the 4 preproc steps are as follow:
+                1) All classes present in the ground truth data are evaluated separately.
+                2) No matched tracker detections are removed.
+                3) Unmatched tracker detections are removed if there is not ground truth data and the class does not
+                    belong to the categories marked as negative for this sequence. Additionally, unmatched tracker
+                    detections for classes which are marked as not exhaustively labeled are removed.
+                4) No gt detections are removed.
+            Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
+            and the tracks from the tracker data are sorted according to the tracker confidence.
+        """
+        cls_id = self.class_name_to_class_id[cls]
+        is_not_exhaustively_labeled = cls_id in raw_data['not_exhaustively_labeled_cls']
+        is_neg_category = cls_id in raw_data['neg_cat_ids']
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+        for t in range(raw_data['num_timesteps']):
+
+            # Only extract relevant dets for this class for preproc and eval (cls)
+            gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
+            gt_class_mask = gt_class_mask.astype(np.bool)
+            gt_ids = raw_data['gt_ids'][t][gt_class_mask]
+            gt_dets = raw_data['gt_dets'][t][gt_class_mask]
+
+            tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
+            tracker_class_mask = tracker_class_mask.astype(np.bool)
+            tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
+            tracker_dets = raw_data['tracker_dets'][t][tracker_class_mask]
+            tracker_confidences = raw_data['tracker_confidences'][t][tracker_class_mask]
+            similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
+
+            # Match tracker and gt dets (with hungarian algorithm).
+            unmatched_indices = np.arange(tracker_ids.shape[0])
+            if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
+                matching_scores = similarity_scores.copy()
+                matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
+                match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
+                match_cols = match_cols[actually_matched_mask]
+                unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
+
+            if gt_ids.shape[0] == 0 and not is_neg_category:
+                to_remove_tracker = unmatched_indices
+            elif is_not_exhaustively_labeled:
+                to_remove_tracker = unmatched_indices
+            else:
+                to_remove_tracker = np.array([], dtype=np.int)
+
+            # remove all unwanted unmatched tracker detections
+            data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+            data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+            data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
+            similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+
+            data['gt_ids'][t] = gt_ids
+            data['gt_dets'][t] = gt_dets
+            data['similarity_scores'][t] = similarity_scores
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+        data['seq'] = raw_data['seq']
+
+        # get track representations
+        data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
+        data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
+        data['gt_track_lengths'] = raw_data['classes_to_gt_track_lengths'][cls_id]
+        data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
+        data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
+        data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
+        data['dt_track_lengths'] = raw_data['classes_to_dt_track_lengths'][cls_id]
+        data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
+        data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
+        data['not_exhaustively_labeled'] = is_not_exhaustively_labeled
+        data['iou_type'] = 'bbox'
+
+        # sort tracker data tracks by tracker confidence scores
+        if data['dt_tracks']:
+            idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
+            data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
+            data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
+            data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
+            data['dt_track_lengths'] = [data['dt_track_lengths'][i] for i in idx]
+            data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
+        # Ensure that ids are unique per timestep.
+        self._check_unique_ids(data)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t)
+        return similarity_scores
+
+    def _merge_categories(self, annotations):
+        """
+        Merges categories with a merged tag. Adapted from https://github.com/TAO-Dataset
+        :param annotations: the annotations in which the classes should be merged
+        :return: None
+        """
+        merge_map = {}
+        for category in self.gt_data['categories']:
+            if 'merged' in category:
+                for to_merge in category['merged']:
+                    merge_map[to_merge['id']] = category['id']
+
+        for ann in annotations:
+            ann['category_id'] = merge_map.get(ann['category_id'], ann['category_id'])
+
+    def _compute_vid_mappings(self, annotations):
+        """
+        Computes mappings from Videos to corresponding tracks and images.
+        :param annotations: the annotations for which the mapping should be generated
+        :return: the video-to-track-mapping, the video-to-image-mapping
+        """
+        vids_to_tracks = {}
+        vids_to_imgs = {}
+        vid_ids = [vid['id'] for vid in self.gt_data['videos']]
+
+        # compute an mapping from image IDs to images
+        images = {}
+        for image in self.gt_data['images']:
+            images[image['id']] = image
+
+        for ann in annotations:
+            ann["area"] = ann["bbox"][2] * ann["bbox"][3]
+
+            vid = ann["video_id"]
+            if ann["video_id"] not in vids_to_tracks.keys():
+                vids_to_tracks[ann["video_id"]] = list()
+            if ann["video_id"] not in vids_to_imgs.keys():
+                vids_to_imgs[ann["video_id"]] = list()
+
+            # Fill in vids_to_tracks
+            tid = ann["track_id"]
+            exist_tids = [track["id"] for track in vids_to_tracks[vid]]
+            try:
+                index1 = exist_tids.index(tid)
+            except ValueError:
+                index1 = -1
+            if tid not in exist_tids:
+                curr_track = {"id": tid, "category_id": ann['category_id'],
+                              "video_id": vid, "annotations": [ann]}
+                vids_to_tracks[vid].append(curr_track)
+            else:
+                vids_to_tracks[vid][index1]["annotations"].append(ann)
+
+            # Fill in vids_to_imgs
+            img_id = ann['image_id']
+            exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
+            try:
+                index2 = exist_img_ids.index(img_id)
+            except ValueError:
+                index2 = -1
+            if index2 == -1:
+                curr_img = {"id": img_id, "annotations": [ann]}
+                vids_to_imgs[vid].append(curr_img)
+            else:
+                vids_to_imgs[vid][index2]["annotations"].append(ann)
+
+        # sort annotations by frame index and compute track area
+        for vid, tracks in vids_to_tracks.items():
+            for track in tracks:
+                track["annotations"] = sorted(
+                    track['annotations'],
+                    key=lambda x: images[x['image_id']]['frame_index'])
+                # Computer average area
+                track["area"] = (sum(x['area'] for x in track['annotations']) / len(track['annotations']))
+
+        # Ensure all videos are present
+        for vid_id in vid_ids:
+            if vid_id not in vids_to_tracks.keys():
+                vids_to_tracks[vid_id] = []
+            if vid_id not in vids_to_imgs.keys():
+                vids_to_imgs[vid_id] = []
+
+        return vids_to_tracks, vids_to_imgs
+
+    def _compute_image_to_timestep_mappings(self):
+        """
+        Computes a mapping from images to the corresponding timestep in the sequence.
+        :return: the image-to-timestep-mapping
+        """
+        images = {}
+        for image in self.gt_data['images']:
+            images[image['id']] = image
+
+        seq_to_imgs_to_timestep = {vid['id']: dict() for vid in self.gt_data['videos']}
+        for vid in seq_to_imgs_to_timestep:
+            curr_imgs = [img['id'] for img in self.videos_to_gt_images[vid]]
+            curr_imgs = sorted(curr_imgs, key=lambda x: images[x]['frame_index'])
+            seq_to_imgs_to_timestep[vid] = {curr_imgs[i]: i for i in range(len(curr_imgs))}
+
+        return seq_to_imgs_to_timestep
+
+    def _limit_dets_per_image(self, annotations):
+        """
+        Limits the number of detections for each image to config['MAX_DETECTIONS']. Adapted from
+        https://github.com/TAO-Dataset/
+        :param annotations: the annotations in which the detections should be limited
+        :return: the annotations with limited detections
+        """
+        max_dets = self.config['MAX_DETECTIONS']
+        img_ann = defaultdict(list)
+        for ann in annotations:
+            img_ann[ann["image_id"]].append(ann)
+
+        for img_id, _anns in img_ann.items():
+            if len(_anns) <= max_dets:
+                continue
+            _anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
+            img_ann[img_id] = _anns[:max_dets]
+
+        return [ann for anns in img_ann.values() for ann in anns]
+
+    def _fill_video_ids_inplace(self, annotations):
+        """
+        Fills in missing video IDs inplace. Adapted from https://github.com/TAO-Dataset/
+        :param annotations: the annotations for which the videos IDs should be filled inplace
+        :return: None
+        """
+        missing_video_id = [x for x in annotations if 'video_id' not in x]
+        if missing_video_id:
+            image_id_to_video_id = {
+                x['id']: x['video_id'] for x in self.gt_data['images']
+            }
+            for x in missing_video_id:
+                x['video_id'] = image_id_to_video_id[x['image_id']]
+
+    @staticmethod
+    def _make_track_ids_unique(annotations):
+        """
+        Makes the track IDs unqiue over the whole annotation set. Adapted from https://github.com/TAO-Dataset/
+        :param annotations: the annotation set
+        :return: the number of updated IDs
+        """
+        track_id_videos = {}
+        track_ids_to_update = set()
+        max_track_id = 0
+        for ann in annotations:
+            t = ann['track_id']
+            if t not in track_id_videos:
+                track_id_videos[t] = ann['video_id']
+
+            if ann['video_id'] != track_id_videos[t]:
+                # Track id is assigned to multiple videos
+                track_ids_to_update.add(t)
+            max_track_id = max(max_track_id, t)
+
+        if track_ids_to_update:
+            print('true')
+            next_id = itertools.count(max_track_id + 1)
+            new_track_ids = defaultdict(lambda: next(next_id))
+            for ann in annotations:
+                t = ann['track_id']
+                v = ann['video_id']
+                if t in track_ids_to_update:
+                    ann['track_id'] = new_track_ids[t, v]
+        return len(track_ids_to_update)
+
+    def _split_known_unknown_distractor(self):
+        all_ids = set([i for i in range(1, 2000)])  # 2000 is larger than the max category id in TAO-OW.
+        # `knowns` includes 78 TAO_category_ids that corresponds to 78 COCO classes.
+        # (The other 2 COCO classes do not have corresponding classes in TAO).
+        self.knowns = {4, 13, 1038, 544, 1057, 34, 35, 36, 41, 45, 58, 60, 579, 1091, 1097, 1099, 78, 79, 81, 91, 1115,
+                     1117, 95, 1122, 99, 1132, 621, 1135, 625, 118, 1144, 126, 642, 1155, 133, 1162, 139, 154, 174, 185,
+                     699, 1215, 714, 717, 1229, 211, 729, 221, 229, 747, 235, 237, 779, 276, 805, 299, 829, 852, 347,
+                     371, 382, 896, 392, 926, 937, 428, 429, 961, 452, 979, 980, 982, 475, 480, 993, 1001, 502, 1018}
+        # `distractors` is defined as in the paper "Opening up Open-World Tracking"
+        self.distractors = {20, 63, 108, 180, 188, 204, 212, 247, 303, 403, 407, 415, 490, 504, 507, 513, 529, 567,
+                            569, 588, 672, 691, 702, 708, 711, 720, 736, 737, 798, 813, 815, 827, 831, 851, 877, 883,
+                            912, 971, 976, 1130, 1133, 1134, 1169, 1184, 1220}
+        self.unknowns = all_ids.difference(self.knowns.union(self.distractors))
+
+    def _filter_gt_data(self, raw_gt_data):
+        """
+        Filter out irrelevant data in the raw_gt_data
+        Args:
+            raw_gt_data: directly loaded from json.
+
+        Returns:
+            filtered gt_data
+        """
+        valid_cat_ids = list()
+        if self.subset == "known":
+            valid_cat_ids = self.knowns
+        elif self.subset == "distractor":
+            valid_cat_ids = self.distractors
+        elif self.subset == "unknown":
+            valid_cat_ids = self.unknowns
+        # elif self.subset == "test_only_unknowns":
+        #     valid_cat_ids = test_only_unknowns
+        else:
+            raise Exception("The parameter `SUBSET` is incorrect")
+
+        filtered = dict()
+        filtered["videos"] = raw_gt_data["videos"]
+        # filtered["videos"] = list()
+        unwanted_vid = set()
+        # for video in raw_gt_data["videos"]:
+        #     datasrc = video["name"].split('/')[1]
+        #     if datasrc in data_srcs:
+        #         filtered["videos"].append(video)
+        #     else:
+        #         unwanted_vid.add(video["id"])
+
+        filtered["annotations"] = list()
+        for ann in raw_gt_data["annotations"]:
+            if (ann["video_id"] not in unwanted_vid) and (ann["category_id"] in valid_cat_ids):
+                filtered["annotations"].append(ann)
+
+        filtered["tracks"] = list()
+        for track in raw_gt_data["tracks"]:
+            if (track["video_id"] not in unwanted_vid) and (track["category_id"] in valid_cat_ids):
+                filtered["tracks"].append(track)
+
+        filtered["images"] = list()
+        for image in raw_gt_data["images"]:
+            if image["video_id"] not in unwanted_vid:
+                filtered["images"].append(image)
+
+        filtered["categories"] = list()
+        for cat in raw_gt_data["categories"]:
+            if cat["id"] in valid_cat_ids:
+                filtered["categories"].append(cat)
+
+        filtered["info"] = raw_gt_data["info"]
+        filtered["licenses"] = raw_gt_data["licenses"]
+
+        return filtered
--- a/test/yolov7-tracker/tracker/trackeval/datasets/visdrone.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/visdrone.py
@@ -0,0 +1,438 @@
+import os
+import csv
+import configparser
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ._base_dataset import _BaseDataset
+from .. import utils
+from .. import _timing
+from ..utils import TrackEvalException
+
+
+class VisDrone2DBox(_BaseDataset):
+    """Dataset class for MOT Challenge 2D bounding box tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/mot_challenge/'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/mot_challenge/'),  # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'CLASSES_TO_EVAL': ['pedestrain', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'],  # Valid: ['pedestrian']
+            'BENCHMARK': 'MOT17',  # Valid: 'MOT17', 'MOT16', 'MOT20', 'MOT15'
+            'SPLIT_TO_EVAL': 'train',  # Valid: 'train', 'test', 'all'
+            'INPUT_AS_ZIP': False,  # Whether tracker input files are zipped
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'DO_PREPROC': True,  # Whether to perform preprocessing (never done for MOT15)
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            'SEQMAP_FOLDER': None,  # Where seqmaps are found (if None, GT_FOLDER/seqmaps)
+            'SEQMAP_FILE': None,  # Directly specify seqmap file (if none use seqmap_folder/benchmark-split_to_eval)
+            'SEQ_INFO': None,  # If not None, directly specify sequences to eval and their number of timesteps
+            'GT_LOC_FORMAT': '{gt_folder}/{seq}/gt/gt.txt',  # '{gt_folder}/{seq}/gt/gt.txt'
+            'SKIP_SPLIT_FOL': False,  # If False, data is in GT_FOLDER/BENCHMARK-SPLIT_TO_EVAL/ and in
+                                      # TRACKERS_FOLDER/BENCHMARK-SPLIT_TO_EVAL/tracker/
+                                      # If True, then the middle 'benchmark-split' folder is skipped for both.
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+
+        self.benchmark = self.config['BENCHMARK']
+        gt_set = self.config['BENCHMARK'] + '-' + self.config['SPLIT_TO_EVAL']
+        self.gt_set = gt_set
+        if not self.config['SKIP_SPLIT_FOL']:
+            split_fol = gt_set
+        else:
+            split_fol = ''
+        self.gt_fol = os.path.join(self.config['GT_FOLDER'], split_fol)
+        self.tracker_fol = os.path.join(self.config['TRACKERS_FOLDER'], split_fol)
+        self.should_classes_combine = False
+        self.use_super_categories = False
+        self.data_is_zipped = self.config['INPUT_AS_ZIP']
+        self.do_preproc = self.config['DO_PREPROC']
+
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+
+        # Get classes to eval
+        self.valid_classes = ['pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor']
+        self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+                           for cls in self.config['CLASSES_TO_EVAL']]
+        if not all(self.class_list):
+            raise TrackEvalException('Attempted to evaluate an invalid class. Only pedestrian class is valid.')
+        self.class_name_to_class_id = {'ignored': 0, 'pedestrian': 1, 'people': 2, 'bicycle': 3, 'car': 4, 'van': 5,
+                                       'truck': 6, 'tricycle': 7, 'awning-tricycle': 8, 'bus': 9,
+                                       'motor': 10, 'other': 11}
+        self.valid_class_numbers = list(self.class_name_to_class_id.values())
+
+        # Get sequences to eval and check gt files exist
+        self.seq_list, self.seq_lengths = self._get_seq_info()
+        if len(self.seq_list) < 1:
+            raise TrackEvalException('No sequences are selected to be evaluated.')
+
+        # Check gt files exist
+        for seq in self.seq_list:
+            if not self.data_is_zipped:
+                curr_file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
+                if not os.path.isfile(curr_file):
+                    print('GT file not found ' + curr_file)
+                    raise TrackEvalException('GT file not found for sequence: ' + seq)
+        if self.data_is_zipped:
+            curr_file = os.path.join(self.gt_fol, 'data.zip')
+            if not os.path.isfile(curr_file):
+                print('GT file not found ' + curr_file)
+                raise TrackEvalException('GT file not found: ' + os.path.basename(curr_file))
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        for tracker in self.tracker_list:
+            if self.data_is_zipped:
+                curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+                if not os.path.isfile(curr_file):
+                    print('Tracker file not found: ' + curr_file)
+                    raise TrackEvalException('Tracker file not found: ' + tracker + '/' + os.path.basename(curr_file))
+            else:
+                for seq in self.seq_list:
+                    curr_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+                    if not os.path.isfile(curr_file):
+                        print('Tracker file not found: ' + curr_file)
+                        raise TrackEvalException(
+                            'Tracker file not found: ' + tracker + '/' + self.tracker_sub_fol + '/' + os.path.basename(
+                                curr_file))
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _get_seq_info(self):
+        seq_list = []
+        seq_lengths = {}
+        if self.config["SEQ_INFO"]:
+            seq_list = list(self.config["SEQ_INFO"].keys())
+            seq_lengths = self.config["SEQ_INFO"]
+
+            # If sequence length is 'None' tries to read sequence length from .ini files.
+            for seq, seq_length in seq_lengths.items():
+                if seq_length is None:
+                    ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
+                    if not os.path.isfile(ini_file):
+                        raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
+                    ini_data = configparser.ConfigParser()
+                    ini_data.read(ini_file)
+                    seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
+
+        else:
+            if self.config["SEQMAP_FILE"]:
+                seqmap_file = self.config["SEQMAP_FILE"]
+            else:
+                if self.config["SEQMAP_FOLDER"] is None:
+                    seqmap_file = os.path.join(self.config['GT_FOLDER'], 'seqmaps', self.gt_set + '.txt')
+                else:
+                    seqmap_file = os.path.join(self.config["SEQMAP_FOLDER"], self.gt_set + '.txt')
+            if not os.path.isfile(seqmap_file):
+                print('no seqmap found: ' + seqmap_file)
+                raise TrackEvalException('no seqmap found: ' + os.path.basename(seqmap_file))
+            with open(seqmap_file) as fp:
+                reader = csv.reader(fp)
+                for i, row in enumerate(reader):
+                    if i == 0 or row[0] == '':
+                        continue
+                    seq = row[0]
+                    seq_list.append(seq)
+                    ini_file = os.path.join(self.gt_fol, seq, 'seqinfo.ini')
+                    if not os.path.isfile(ini_file):
+                        raise TrackEvalException('ini file does not exist: ' + seq + '/' + os.path.basename(ini_file))
+                    ini_data = configparser.ConfigParser()
+                    ini_data.read(ini_file)
+                    seq_lengths[seq] = int(ini_data['Sequence']['seqLength'])
+        return seq_list, seq_lengths
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the MOT Challenge 2D box format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
+        [gt_extras] : list (for each timestep) of dicts (for each extra) of 1D NDArrays (for each det).
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        """
+        # File location
+        if self.data_is_zipped:
+            if is_gt:
+                zip_file = os.path.join(self.gt_fol, 'data.zip')
+            else:
+                zip_file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol + '.zip')
+            file = seq + '.txt'
+        else:
+            zip_file = None
+            if is_gt:
+                file = self.config["GT_LOC_FORMAT"].format(gt_folder=self.gt_fol, seq=seq)
+            else:
+                file = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol, seq + '.txt')
+
+        # Load raw data from text file
+        read_data, ignore_data = self._load_simple_text_file(file, is_zipped=self.data_is_zipped, zip_file=zip_file)
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq]
+        data_keys = ['ids', 'classes', 'dets']
+        if is_gt:
+            data_keys += ['gt_crowd_ignore_regions', 'gt_extras']
+        else:
+            data_keys += ['tracker_confidences']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+
+        # Check for any extra time keys
+        current_time_keys = [str( t+ 1) for t in range(num_timesteps)]
+        extra_time_keys = [x for x in read_data.keys() if x not in current_time_keys]
+        if len(extra_time_keys) > 0:
+            if is_gt:
+                text = 'Ground-truth'
+            else:
+                text = 'Tracking'
+            raise TrackEvalException(
+                text + ' data contains the following invalid timesteps in seq %s: ' % seq + ', '.join(
+                    [str(x) + ', ' for x in extra_time_keys]))
+
+        for t in range(num_timesteps):
+            time_key = str(t+1)
+            if time_key in read_data.keys():
+                try:
+                    time_data = np.asarray(read_data[time_key], dtype=np.float)
+                except ValueError:
+                    if is_gt:
+                        raise TrackEvalException(
+                            'Cannot convert gt data for sequence %s to float. Is data corrupted?' % seq)
+                    else:
+                        raise TrackEvalException(
+                            'Cannot convert tracking data from tracker %s, sequence %s to float. Is data corrupted?' % (
+                                tracker, seq))
+                try:
+                    raw_data['dets'][t] = np.atleast_2d(time_data[:, 2:6])
+                    raw_data['ids'][t] = np.atleast_1d(time_data[:, 1]).astype(int)
+                except IndexError:
+                    if is_gt:
+                        err = 'Cannot load gt data from sequence %s, because there is not enough ' \
+                              'columns in the data.' % seq
+                        raise TrackEvalException(err)
+                    else:
+                        err = 'Cannot load tracker data from tracker %s, sequence %s, because there is not enough ' \
+                              'columns in the data.' % (tracker, seq)
+                        raise TrackEvalException(err)
+                if time_data.shape[1] >= 8:
+                    raw_data['classes'][t] = np.atleast_1d(time_data[:, 7]).astype(int)
+                else:
+                    if not is_gt:
+                        raw_data['classes'][t] = np.ones_like(raw_data['ids'][t])
+                    else:
+                        raise TrackEvalException(
+                            'GT data is not in a valid format, there is not enough rows in seq %s, timestep %i.' % (
+                                seq, t))
+                if is_gt:
+                    gt_extras_dict = {'zero_marked': np.atleast_1d(time_data[:, 6].astype(int))}
+                    raw_data['gt_extras'][t] = gt_extras_dict
+                else:
+                    raw_data['tracker_confidences'][t] = np.atleast_1d(time_data[:, 6])
+            else:
+                raw_data['dets'][t] = np.empty((0, 4))
+                raw_data['ids'][t] = np.empty(0).astype(int)
+                raw_data['classes'][t] = np.empty(0).astype(int)
+                if is_gt:
+                    gt_extras_dict = {'zero_marked': np.empty(0)}
+                    raw_data['gt_extras'][t] = gt_extras_dict
+                else:
+                    raw_data['tracker_confidences'][t] = np.empty(0)
+            if is_gt:
+                raw_data['gt_crowd_ignore_regions'][t] = np.empty((0, 4))
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+        raw_data['num_timesteps'] = num_timesteps
+        raw_data['seq'] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+
+        MOT Challenge:
+            In MOT Challenge, the 4 preproc steps are as follow:
+                1) There is only one class (pedestrian) to be evaluated, but all other classes are used for preproc.
+                2) Predictions are matched against all gt boxes (regardless of class), those matching with distractor
+                    objects are removed.
+                3) There is no crowd ignore regions.
+                4) All gt dets except pedestrian are removed, also removes pedestrian gt dets marked with zero_marked.
+        """
+        # Check that input data has unique ids
+        self._check_unique_ids(raw_data)
+
+        # distractor_class_names = ['person_on_vehicle', 'static_person', 'distractor', 'reflection']
+        distractor_class_names = ['ignored', 'other']
+        if self.benchmark == 'MOT20':
+            distractor_class_names.append('non_mot_vehicle')
+        distractor_classes = [self.class_name_to_class_id[x] for x in distractor_class_names]
+        cls_id = self.class_name_to_class_id[cls]
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'tracker_confidences', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+        for t in range(raw_data['num_timesteps']):
+
+            # Get all data
+            gt_ids = raw_data['gt_ids'][t]
+            gt_dets = raw_data['gt_dets'][t]
+            gt_classes = raw_data['gt_classes'][t]
+            gt_zero_marked = raw_data['gt_extras'][t]['zero_marked']
+
+            tracker_ids = raw_data['tracker_ids'][t]
+            tracker_dets = raw_data['tracker_dets'][t]
+            tracker_classes = raw_data['tracker_classes'][t]
+            tracker_confidences = raw_data['tracker_confidences'][t]
+            similarity_scores = raw_data['similarity_scores'][t]
+
+            # Evaluation is ONLY valid for pedestrian class
+            if len(tracker_classes) > 0 and np.max(tracker_classes) > 1:
+                raise TrackEvalException(
+                    'Evaluation is only valid for pedestrian class. Non pedestrian class (%i) found in sequence %s at '
+                    'timestep %i.' % (np.max(tracker_classes), raw_data['seq'], t))
+
+            # Match tracker and gt dets (with hungarian algorithm) and remove tracker dets which match with gt dets
+            # which are labeled as belonging to a distractor class.
+            to_remove_tracker = np.array([], np.int)
+            if self.do_preproc and self.benchmark != 'MOT15' and gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
+
+                # Check all classes are valid:
+                invalid_classes = np.setdiff1d(np.unique(gt_classes), self.valid_class_numbers)
+                if len(invalid_classes) > 0:
+                    print(' '.join([str(x) for x in invalid_classes]))
+                    raise(TrackEvalException('Attempting to evaluate using invalid gt classes. '
+                                             'This warning only triggers if preprocessing is performed, '
+                                             'e.g. not for MOT15 or where prepropressing is explicitly disabled. '
+                                             'Please either check your gt data, or disable preprocessing. '
+                                             'The following invalid classes were found in timestep ' + str(t) + ': ' +
+                                             ' '.join([str(x) for x in invalid_classes])))
+
+                matching_scores = similarity_scores.copy()
+                matching_scores[matching_scores < 0.5 - np.finfo('float').eps] = 0
+                match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                actually_matched_mask = matching_scores[match_rows, match_cols] > 0 + np.finfo('float').eps
+                match_rows = match_rows[actually_matched_mask]
+                match_cols = match_cols[actually_matched_mask]
+
+                is_distractor_class = np.isin(gt_classes[match_rows], distractor_classes)
+                to_remove_tracker = match_cols[is_distractor_class]
+
+            # Apply preprocessing to remove all unwanted tracker dets.
+            data['tracker_ids'][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+            data['tracker_dets'][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+            data['tracker_confidences'][t] = np.delete(tracker_confidences, to_remove_tracker, axis=0)
+            similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+
+            # Remove gt detections marked as to remove (zero marked), and also remove gt detections not in pedestrian
+            # class (not applicable for MOT15)
+            if self.do_preproc and self.benchmark != 'MOT15':
+                gt_to_keep_mask = (np.not_equal(gt_zero_marked, 0)) & \
+                                  (np.equal(gt_classes, cls_id))
+            else:
+                # There are no classes for MOT15
+                gt_to_keep_mask = np.not_equal(gt_zero_marked, 0)
+            data['gt_ids'][t] = gt_ids[gt_to_keep_mask]
+            data['gt_dets'][t] = gt_dets[gt_to_keep_mask, :]
+            data['similarity_scores'][t] = similarity_scores[gt_to_keep_mask]
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+        data['seq'] = raw_data['seq']
+
+        # Ensure again that ids are unique per timestep after preproc.
+        self._check_unique_ids(data, after_preproc=True)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t, box_format='xywh')
+        return similarity_scores
--- a/test/yolov7-tracker/tracker/trackeval/datasets/youtube_vis.py
+++ b/test/yolov7-tracker/tracker/trackeval/datasets/youtube_vis.py
@@ -0,0 +1,364 @@
+import os
+import numpy as np
+import json
+from ._base_dataset import _BaseDataset
+from ..utils import TrackEvalException
+from .. import utils
+from .. import _timing
+
+
+class YouTubeVIS(_BaseDataset):
+    """Dataset class for YouTubeVIS tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'GT_FOLDER': os.path.join(code_path, 'data/gt/youtube_vis/'),  # Location of GT data
+            'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/youtube_vis/'),
+            # Trackers location
+            'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+            'CLASSES_TO_EVAL': None,  # Classes to eval (if None, all classes)
+            'SPLIT_TO_EVAL': 'train_sub_split',  # Valid: 'train', 'val', 'train_sub_split'
+            'PRINT_CONFIG': True,  # Whether to print current config
+            'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config(), self.get_name())
+        self.gt_fol = self.config['GT_FOLDER'] + 'youtube_vis_' + self.config['SPLIT_TO_EVAL']
+        self.tracker_fol = self.config['TRACKERS_FOLDER'] + 'youtube_vis_' + self.config['SPLIT_TO_EVAL']
+        self.use_super_categories = False
+        self.should_classes_combine = True
+
+        self.output_fol = self.config['OUTPUT_FOLDER']
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+        self.output_sub_fol = self.config['OUTPUT_SUB_FOLDER']
+        self.tracker_sub_fol = self.config['TRACKER_SUB_FOLDER']
+
+        if not os.path.exists(self.gt_fol):
+            print("GT folder not found: " + self.gt_fol)
+            raise TrackEvalException("GT folder not found: " + os.path.basename(self.gt_fol))
+        gt_dir_files = [file for file in os.listdir(self.gt_fol) if file.endswith('.json')]
+        if len(gt_dir_files) != 1:
+            raise TrackEvalException(self.gt_fol + ' does not contain exactly one json file.')
+
+        with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
+            self.gt_data = json.load(f)
+
+        # Get classes to eval
+        self.valid_classes = [cls['name'] for cls in self.gt_data['categories']]
+        cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
+
+        if self.config['CLASSES_TO_EVAL']:
+            self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+                               for cls in self.config['CLASSES_TO_EVAL']]
+            if not all(self.class_list):
+                raise TrackEvalException('Attempted to evaluate an invalid class. Only classes ' +
+                                         ', '.join(self.valid_classes) + ' are valid.')
+        else:
+            self.class_list = [cls['name'] for cls in self.gt_data['categories']]
+        self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
+
+        # Get sequences to eval and check gt files exist
+        self.seq_list = [vid['file_names'][0].split('/')[0] for vid in self.gt_data['videos']]
+        self.seq_name_to_seq_id = {vid['file_names'][0].split('/')[0]: vid['id'] for vid in self.gt_data['videos']}
+        self.seq_lengths = {vid['id']: len(vid['file_names']) for vid in self.gt_data['videos']}
+
+        # encode masks and compute track areas
+        self._prepare_gt_annotations()
+
+        # Get trackers to eval
+        if self.config['TRACKERS_TO_EVAL'] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config['TRACKERS_TO_EVAL']
+
+        if self.config['TRACKER_DISPLAY_NAMES'] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config['TRACKERS_TO_EVAL'] is not None) and (
+                len(self.config['TRACKER_DISPLAY_NAMES']) == len(self.tracker_list)):
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.config['TRACKER_DISPLAY_NAMES']))
+        else:
+            raise TrackEvalException('List of tracker files and tracker display names do not match.')
+
+        # counter for globally unique track IDs
+        self.global_tid_counter = 0
+
+        self.tracker_data = dict()
+        for tracker in self.tracker_list:
+            tracker_dir_path = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
+            tr_dir_files = [file for file in os.listdir(tracker_dir_path) if file.endswith('.json')]
+            if len(tr_dir_files) != 1:
+                raise TrackEvalException(tracker_dir_path + ' does not contain exactly one json file.')
+
+            with open(os.path.join(tracker_dir_path, tr_dir_files[0])) as f:
+                curr_data = json.load(f)
+
+            self.tracker_data[tracker] = curr_data
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the YouTubeVIS format
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets]: list (for each timestep) of lists of detections.
+        [classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_iscrowd]: dictionary with class values
+                                as keys and lists (for each track) as values
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        [classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_dt_track_ids, classes_to_dt_track_areas]: dictionary with class values as keys and lists as values
+        [classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
+        """
+        # select sequence tracks
+        seq_id = self.seq_name_to_seq_id[seq]
+        if is_gt:
+            tracks = [ann for ann in self.gt_data['annotations'] if ann['video_id'] == seq_id]
+        else:
+            tracks = self._get_tracker_seq_tracks(tracker, seq_id)
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq_id]
+        data_keys = ['ids', 'classes', 'dets']
+        if not is_gt:
+            data_keys += ['tracker_confidences']
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+        for t in range(num_timesteps):
+            raw_data['dets'][t] = [track['segmentations'][t] for track in tracks if track['segmentations'][t]]
+            raw_data['ids'][t] = np.atleast_1d([track['id'] for track in tracks
+                                                if track['segmentations'][t]]).astype(int)
+            raw_data['classes'][t] = np.atleast_1d([track['category_id'] for track in tracks
+                                                    if track['segmentations'][t]]).astype(int)
+            if not is_gt:
+                raw_data['tracker_confidences'][t] = np.atleast_1d([track['score'] for track in tracks
+                                                                    if track['segmentations'][t]]).astype(float)
+
+        if is_gt:
+            key_map = {'ids': 'gt_ids',
+                       'classes': 'gt_classes',
+                       'dets': 'gt_dets'}
+        else:
+            key_map = {'ids': 'tracker_ids',
+                       'classes': 'tracker_classes',
+                       'dets': 'tracker_dets'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        all_cls_ids = {self.class_name_to_class_id[cls] for cls in self.class_list}
+        classes_to_tracks = {cls: [track for track in tracks if track['category_id'] == cls] for cls in all_cls_ids}
+
+        # mapping from classes to track representations and track information
+        raw_data['classes_to_tracks'] = {cls: [{i: track['segmentations'][i]
+                                                for i in range(len(track['segmentations']))} for track in tracks]
+                                         for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_ids'] = {cls: [track['id'] for track in tracks]
+                                            for cls, tracks in classes_to_tracks.items()}
+        raw_data['classes_to_track_areas'] = {cls: [track['area'] for track in tracks]
+                                              for cls, tracks in classes_to_tracks.items()}
+
+        if is_gt:
+            raw_data['classes_to_gt_track_iscrowd'] = {cls: [track['iscrowd'] for track in tracks]
+                                                       for cls, tracks in classes_to_tracks.items()}
+        else:
+            raw_data['classes_to_dt_track_scores'] = {cls: np.array([track['score'] for track in tracks])
+                                                      for cls, tracks in classes_to_tracks.items()}
+
+        if is_gt:
+            key_map = {'classes_to_tracks': 'classes_to_gt_tracks',
+                       'classes_to_track_ids': 'classes_to_gt_track_ids',
+                       'classes_to_track_areas': 'classes_to_gt_track_areas'}
+        else:
+            key_map = {'classes_to_tracks': 'classes_to_dt_tracks',
+                       'classes_to_track_ids': 'classes_to_dt_track_ids',
+                       'classes_to_track_areas': 'classes_to_dt_track_areas'}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        raw_data['num_timesteps'] = num_timesteps
+        raw_data['seq'] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """ Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+        YouTubeVIS:
+            In YouTubeVIS, the 4 preproc steps are as follow:
+                1) There are 40 classes which are evaluated separately.
+                2) No matched tracker dets are removed.
+                3) No unmatched tracker dets are removed.
+                4) No gt dets are removed.
+            Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
+            and the tracks from the tracker data are sorted according to the tracker confidence.
+        """
+        cls_id = self.class_name_to_class_id[cls]
+
+        data_keys = ['gt_ids', 'tracker_ids', 'gt_dets', 'tracker_dets', 'similarity_scores']
+        data = {key: [None] * raw_data['num_timesteps'] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+
+        for t in range(raw_data['num_timesteps']):
+
+            # Only extract relevant dets for this class for eval (cls)
+            gt_class_mask = np.atleast_1d(raw_data['gt_classes'][t] == cls_id)
+            gt_class_mask = gt_class_mask.astype(np.bool)
+            gt_ids = raw_data['gt_ids'][t][gt_class_mask]
+            gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
+
+            tracker_class_mask = np.atleast_1d(raw_data['tracker_classes'][t] == cls_id)
+            tracker_class_mask = tracker_class_mask.astype(np.bool)
+            tracker_ids = raw_data['tracker_ids'][t][tracker_class_mask]
+            tracker_dets = [raw_data['tracker_dets'][t][ind] for ind in range(len(tracker_class_mask)) if
+                            tracker_class_mask[ind]]
+            similarity_scores = raw_data['similarity_scores'][t][gt_class_mask, :][:, tracker_class_mask]
+
+            data['tracker_ids'][t] = tracker_ids
+            data['tracker_dets'][t] = tracker_dets
+            data['gt_ids'][t] = gt_ids
+            data['gt_dets'][t] = gt_dets
+            data['similarity_scores'][t] = similarity_scores
+
+            unique_gt_ids += list(np.unique(data['gt_ids'][t]))
+            unique_tracker_ids += list(np.unique(data['tracker_ids'][t]))
+            num_tracker_dets += len(data['tracker_ids'][t])
+            num_gt_dets += len(data['gt_ids'][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['gt_ids'][t]) > 0:
+                    data['gt_ids'][t] = gt_id_map[data['gt_ids'][t]].astype(np.int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data['num_timesteps']):
+                if len(data['tracker_ids'][t]) > 0:
+                    data['tracker_ids'][t] = tracker_id_map[data['tracker_ids'][t]].astype(np.int)
+
+        # Ensure that ids are unique per timestep.
+        self._check_unique_ids(data)
+
+        # Record overview statistics.
+        data['num_tracker_dets'] = num_tracker_dets
+        data['num_gt_dets'] = num_gt_dets
+        data['num_tracker_ids'] = len(unique_tracker_ids)
+        data['num_gt_ids'] = len(unique_gt_ids)
+        data['num_timesteps'] = raw_data['num_timesteps']
+        data['seq'] = raw_data['seq']
+
+        # get track representations
+        data['gt_tracks'] = raw_data['classes_to_gt_tracks'][cls_id]
+        data['gt_track_ids'] = raw_data['classes_to_gt_track_ids'][cls_id]
+        data['gt_track_areas'] = raw_data['classes_to_gt_track_areas'][cls_id]
+        data['gt_track_iscrowd'] = raw_data['classes_to_gt_track_iscrowd'][cls_id]
+        data['dt_tracks'] = raw_data['classes_to_dt_tracks'][cls_id]
+        data['dt_track_ids'] = raw_data['classes_to_dt_track_ids'][cls_id]
+        data['dt_track_areas'] = raw_data['classes_to_dt_track_areas'][cls_id]
+        data['dt_track_scores'] = raw_data['classes_to_dt_track_scores'][cls_id]
+        data['iou_type'] = 'mask'
+
+        # sort tracker data tracks by tracker confidence scores
+        if data['dt_tracks']:
+            idx = np.argsort([-score for score in data['dt_track_scores']], kind="mergesort")
+            data['dt_track_scores'] = [data['dt_track_scores'][i] for i in idx]
+            data['dt_tracks'] = [data['dt_tracks'][i] for i in idx]
+            data['dt_track_ids'] = [data['dt_track_ids'][i] for i in idx]
+            data['dt_track_areas'] = [data['dt_track_areas'][i] for i in idx]
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_mask_ious(gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False)
+        return similarity_scores
+
+    def _prepare_gt_annotations(self):
+        """
+        Prepares GT data by rle encoding segmentations and computing the average track area.
+        :return: None
+        """
+        # only loaded when needed to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        for track in self.gt_data['annotations']:
+            h = track['height']
+            w = track['width']
+            for i, seg in enumerate(track['segmentations']):
+                if seg:
+                    track['segmentations'][i] = mask_utils.frPyObjects(seg, h, w)
+            areas = [a for a in track['areas'] if a]
+            if len(areas) == 0:
+                track['area'] = 0
+            else:
+                track['area'] = np.array(areas).mean()
+
+    def _get_tracker_seq_tracks(self, tracker, seq_id):
+        """
+        Prepares tracker data for a given sequence. Extracts all annotations for given sequence ID, computes
+        average track area and assigns a track ID.
+        :param tracker: the given tracker
+        :param seq_id: the sequence ID
+        :return: the extracted tracks
+        """
+        # only loaded when needed to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        tracks = [ann for ann in self.tracker_data[tracker] if ann['video_id'] == seq_id]
+        for track in tracks:
+            track['areas'] = []
+            for seg in track['segmentations']:
+                if seg:
+                    track['areas'].append(mask_utils.area(seg))
+                else:
+                    track['areas'].append(None)
+            areas = [a for a in track['areas'] if a]
+            if len(areas) == 0:
+                track['area'] = 0
+            else:
+                track['area'] = np.array(areas).mean()
+            track['id'] = self.global_tid_counter
+            self.global_tid_counter += 1
+        return tracks
--- a/test/yolov7-tracker/tracker/trackeval/eval.py
+++ b/test/yolov7-tracker/tracker/trackeval/eval.py
@@ -0,0 +1,225 @@
+import time
+import traceback
+from multiprocessing.pool import Pool
+from functools import partial
+import os
+from . import utils
+from .utils import TrackEvalException
+from . import _timing
+from .metrics import Count
+
+try:
+    import tqdm
+    TQDM_IMPORTED = True
+except ImportError as _:
+    TQDM_IMPORTED = False
+
+
+class Evaluator:
+    """Evaluator class for evaluating different metrics for different datasets"""
+
+    @staticmethod
+    def get_default_eval_config():
+        """Returns the default config values for evaluation"""
+        code_path = utils.get_code_path()
+        default_config = {
+            'USE_PARALLEL': False,
+            'NUM_PARALLEL_CORES': 8,
+            'BREAK_ON_ERROR': True,  # Raises exception and exits with error
+            'RETURN_ON_ERROR': False,  # if not BREAK_ON_ERROR, then returns from function on error
+            'LOG_ON_ERROR': os.path.join(code_path, 'error_log.txt'),  # if not None, save any errors into a log file.
+
+            'PRINT_RESULTS': True,
+            'PRINT_ONLY_COMBINED': False,
+            'PRINT_CONFIG': True,
+            'TIME_PROGRESS': True,
+            'DISPLAY_LESS_PROGRESS': True,
+
+            'OUTPUT_SUMMARY': True,
+            'OUTPUT_EMPTY_CLASSES': True,  # If False, summary files are not output for classes with no detections
+            'OUTPUT_DETAILED': True,
+            'PLOT_CURVES': True,
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise the evaluator with a config file"""
+        self.config = utils.init_config(config, self.get_default_eval_config(), 'Eval')
+        # Only run timing analysis if not run in parallel.
+        if self.config['TIME_PROGRESS'] and not self.config['USE_PARALLEL']:
+            _timing.DO_TIMING = True
+            if self.config['DISPLAY_LESS_PROGRESS']:
+                _timing.DISPLAY_LESS_PROGRESS = True
+
+    @_timing.time
+    def evaluate(self, dataset_list, metrics_list, show_progressbar=False):
+        """Evaluate a set of metrics on a set of datasets"""
+        config = self.config
+        metrics_list = metrics_list + [Count()]  # Count metrics are always run
+        metric_names = utils.validate_metrics_list(metrics_list)
+        dataset_names = [dataset.get_name() for dataset in dataset_list]
+        output_res = {}
+        output_msg = {}
+
+        for dataset, dataset_name in zip(dataset_list, dataset_names):
+            # Get dataset info about what to evaluate
+            output_res[dataset_name] = {}
+            output_msg[dataset_name] = {}
+            tracker_list, seq_list, class_list = dataset.get_eval_info()
+            print('\nEvaluating %i tracker(s) on %i sequence(s) for %i class(es) on %s dataset using the following '
+                  'metrics: %s\n' % (len(tracker_list), len(seq_list), len(class_list), dataset_name,
+                                     ', '.join(metric_names)))
+
+            # Evaluate each tracker
+            for tracker in tracker_list:
+                # if not config['BREAK_ON_ERROR'] then go to next tracker without breaking
+                try:
+                    # Evaluate each sequence in parallel or in series.
+                    # returns a nested dict (res), indexed like: res[seq][class][metric_name][sub_metric field]
+                    # e.g. res[seq_0001][pedestrian][hota][DetA]
+                    print('\nEvaluating %s\n' % tracker)
+                    time_start = time.time()
+                    if config['USE_PARALLEL']:
+                        if show_progressbar and TQDM_IMPORTED:
+                            seq_list_sorted = sorted(seq_list)
+
+                            with Pool(config['NUM_PARALLEL_CORES']) as pool, tqdm.tqdm(total=len(seq_list)) as pbar:
+                                _eval_sequence = partial(eval_sequence, dataset=dataset, tracker=tracker,
+                                                         class_list=class_list, metrics_list=metrics_list,
+                                                         metric_names=metric_names)
+                                results = []
+                                for r in pool.imap(_eval_sequence, seq_list_sorted,
+                                                   chunksize=20):
+                                    results.append(r)
+                                    pbar.update()
+                                res = dict(zip(seq_list_sorted, results))
+
+                        else:
+                            with Pool(config['NUM_PARALLEL_CORES']) as pool:
+                                _eval_sequence = partial(eval_sequence, dataset=dataset, tracker=tracker,
+                                                         class_list=class_list, metrics_list=metrics_list,
+                                                         metric_names=metric_names)
+                                results = pool.map(_eval_sequence, seq_list)
+                                res = dict(zip(seq_list, results))
+                    else:
+                        res = {}
+                        if show_progressbar and TQDM_IMPORTED:
+                            seq_list_sorted = sorted(seq_list)
+                            for curr_seq in tqdm.tqdm(seq_list_sorted):
+                                res[curr_seq] = eval_sequence(curr_seq, dataset, tracker, class_list, metrics_list,
+                                                              metric_names)
+                        else:
+                            for curr_seq in sorted(seq_list):
+                                res[curr_seq] = eval_sequence(curr_seq, dataset, tracker, class_list, metrics_list,
+                                                              metric_names)
+
+                    # Combine results over all sequences and then over all classes
+
+                    # collecting combined cls keys (cls averaged, det averaged, super classes)
+                    combined_cls_keys = []
+                    res['COMBINED_SEQ'] = {}
+                    # combine sequences for each class
+                    for c_cls in class_list:
+                        res['COMBINED_SEQ'][c_cls] = {}
+                        for metric, metric_name in zip(metrics_list, metric_names):
+                            curr_res = {seq_key: seq_value[c_cls][metric_name] for seq_key, seq_value in res.items() if
+                                        seq_key != 'COMBINED_SEQ'}
+                            res['COMBINED_SEQ'][c_cls][metric_name] = metric.combine_sequences(curr_res)
+                    # combine classes
+                    if dataset.should_classes_combine:
+                        combined_cls_keys += ['cls_comb_cls_av', 'cls_comb_det_av', 'all']
+                        res['COMBINED_SEQ']['cls_comb_cls_av'] = {}
+                        res['COMBINED_SEQ']['cls_comb_det_av'] = {}
+                        for metric, metric_name in zip(metrics_list, metric_names):
+                            cls_res = {cls_key: cls_value[metric_name] for cls_key, cls_value in
+                                       res['COMBINED_SEQ'].items() if cls_key not in combined_cls_keys}
+                            res['COMBINED_SEQ']['cls_comb_cls_av'][metric_name] = \
+                                metric.combine_classes_class_averaged(cls_res)
+                            res['COMBINED_SEQ']['cls_comb_det_av'][metric_name] = \
+                                metric.combine_classes_det_averaged(cls_res)
+                    # combine classes to super classes
+                    if dataset.use_super_categories:
+                        for cat, sub_cats in dataset.super_categories.items():
+                            combined_cls_keys.append(cat)
+                            res['COMBINED_SEQ'][cat] = {}
+                            for metric, metric_name in zip(metrics_list, metric_names):
+                                cat_res = {cls_key: cls_value[metric_name] for cls_key, cls_value in
+                                           res['COMBINED_SEQ'].items() if cls_key in sub_cats}
+                                res['COMBINED_SEQ'][cat][metric_name] = metric.combine_classes_det_averaged(cat_res)
+
+                    # Print and output results in various formats
+                    if config['TIME_PROGRESS']:
+                        print('\nAll sequences for %s finished in %.2f seconds' % (tracker, time.time() - time_start))
+                    output_fol = dataset.get_output_fol(tracker)
+                    tracker_display_name = dataset.get_display_name(tracker)
+                    for c_cls in res['COMBINED_SEQ'].keys():  # class_list + combined classes if calculated
+                        summaries = []
+                        details = []
+                        num_dets = res['COMBINED_SEQ'][c_cls]['Count']['Dets']
+                        if config['OUTPUT_EMPTY_CLASSES'] or num_dets > 0:
+                            for metric, metric_name in zip(metrics_list, metric_names):
+                                # for combined classes there is no per sequence evaluation
+                                if c_cls in combined_cls_keys:
+                                    table_res = {'COMBINED_SEQ': res['COMBINED_SEQ'][c_cls][metric_name]}
+                                else:
+                                    table_res = {seq_key: seq_value[c_cls][metric_name] for seq_key, seq_value
+                                                 in res.items()}
+
+                                if config['PRINT_RESULTS'] and config['PRINT_ONLY_COMBINED']:
+                                    dont_print = dataset.should_classes_combine and c_cls not in combined_cls_keys
+                                    if not dont_print:
+                                        metric.print_table({'COMBINED_SEQ': table_res['COMBINED_SEQ']},
+                                                           tracker_display_name, c_cls)
+                                elif config['PRINT_RESULTS']:
+                                    metric.print_table(table_res, tracker_display_name, c_cls)
+                                if config['OUTPUT_SUMMARY']:
+                                    summaries.append(metric.summary_results(table_res))
+                                if config['OUTPUT_DETAILED']:
+                                    details.append(metric.detailed_results(table_res))
+                                if config['PLOT_CURVES']:
+                                    metric.plot_single_tracker_results(table_res, tracker_display_name, c_cls,
+                                                                       output_fol)
+                            if config['OUTPUT_SUMMARY']:
+                                utils.write_summary_results(summaries, c_cls, output_fol)
+                            if config['OUTPUT_DETAILED']:
+                                utils.write_detailed_results(details, c_cls, output_fol)
+
+                    # Output for returning from function
+                    output_res[dataset_name][tracker] = res
+                    output_msg[dataset_name][tracker] = 'Success'
+
+                except Exception as err:
+                    output_res[dataset_name][tracker] = None
+                    if type(err) == TrackEvalException:
+                        output_msg[dataset_name][tracker] = str(err)
+                    else:
+                        output_msg[dataset_name][tracker] = 'Unknown error occurred.'
+                    print('Tracker %s was unable to be evaluated.' % tracker)
+                    print(err)
+                    traceback.print_exc()
+                    if config['LOG_ON_ERROR'] is not None:
+                        with open(config['LOG_ON_ERROR'], 'a') as f:
+                            print(dataset_name, file=f)
+                            print(tracker, file=f)
+                            print(traceback.format_exc(), file=f)
+                            print('\n\n\n', file=f)
+                    if config['BREAK_ON_ERROR']:
+                        raise err
+                    elif config['RETURN_ON_ERROR']:
+                        return output_res, output_msg
+
+        return output_res, output_msg
+
+
+@_timing.time
+def eval_sequence(seq, dataset, tracker, class_list, metrics_list, metric_names):
+    """Function for evaluating a single sequence"""
+
+    raw_data = dataset.get_raw_seq_data(tracker, seq)
+    seq_res = {}
+    for cls in class_list:
+        seq_res[cls] = {}
+        data = dataset.get_preprocessed_seq_data(raw_data, cls)
+        for metric, met_name in zip(metrics_list, metric_names):
+            seq_res[cls][met_name] = metric.eval_sequence(data)
+    return seq_res
--- a/test/yolov7-tracker/tracker/trackeval/metrics/init.py
+++ b/test/yolov7-tracker/tracker/trackeval/metrics/init.py
@@ -0,0 +1,8 @@
+from .hota import HOTA
+from .clear import CLEAR
+from .identity import Identity
+from .count import Count
+from .j_and_f import JAndF
+from .track_map import TrackMAP
+from .vace import VACE
+from .ideucl import IDEucl
--- a/test/yolov7-tracker/tracker/trackeval/metrics/_base_metric.py
+++ b/test/yolov7-tracker/tracker/trackeval/metrics/_base_metric.py
@@ -0,0 +1,133 @@
+
+import numpy as np
+from abc import ABC, abstractmethod
+from .. import _timing
+from ..utils import TrackEvalException
+
+
+class _BaseMetric(ABC):
+    @abstractmethod
+    def __init__(self):
+        self.plottable = False
+        self.integer_fields = []
+        self.float_fields = []
+        self.array_labels = []
+        self.integer_array_fields = []
+        self.float_array_fields = []
+        self.fields = []
+        self.summary_fields = []
+        self.registered = False
+
+    #####################################################################
+    # Abstract functions for subclasses to implement
+
+    @_timing.time
+    @abstractmethod
+    def eval_sequence(self, data):
+        ...
+
+    @abstractmethod
+    def combine_sequences(self, all_res):
+        ...
+
+    @abstractmethod
+    def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
+        ...
+
+    @ abstractmethod
+    def combine_classes_det_averaged(self, all_res):
+        ...
+
+    def plot_single_tracker_results(self, all_res, tracker, output_folder, cls):
+        """Plot results of metrics, only valid for metrics with self.plottable"""
+        if self.plottable:
+            raise NotImplementedError('plot_results is not implemented for metric %s' % self.get_name())
+        else:
+            pass
+
+    #####################################################################
+    # Helper functions which are useful for all metrics:
+
+    @classmethod
+    def get_name(cls):
+        return cls.__name__
+
+    @staticmethod
+    def _combine_sum(all_res, field):
+        """Combine sequence results via sum"""
+        return sum([all_res[k][field] for k in all_res.keys()])
+
+    @staticmethod
+    def _combine_weighted_av(all_res, field, comb_res, weight_field):
+        """Combine sequence results via weighted average"""
+        return sum([all_res[k][field] * all_res[k][weight_field] for k in all_res.keys()]) / np.maximum(1.0, comb_res[
+            weight_field])
+
+    def print_table(self, table_res, tracker, cls):
+        """Prints table of results for all sequences"""
+        print('')
+        metric_name = self.get_name()
+        self._row_print([metric_name + ': ' + tracker + '-' + cls] + self.summary_fields)
+        for seq, results in sorted(table_res.items()):
+            if seq == 'COMBINED_SEQ':
+                continue
+            summary_res = self._summary_row(results)
+            self._row_print([seq] + summary_res)
+        summary_res = self._summary_row(table_res['COMBINED_SEQ'])
+        self._row_print(['COMBINED'] + summary_res)
+
+    def _summary_row(self, results_):
+        vals = []
+        for h in self.summary_fields:
+            if h in self.float_array_fields:
+                vals.append("{0:1.5g}".format(100 * np.mean(results_[h])))
+            elif h in self.float_fields:
+                vals.append("{0:1.5g}".format(100 * float(results_[h])))
+            elif h in self.integer_fields:
+                vals.append("{0:d}".format(int(results_[h])))
+            else:
+                raise NotImplementedError("Summary function not implemented for this field type.")
+        return vals
+
+    @staticmethod
+    def _row_print(*argv):
+        """Prints results in an evenly spaced rows, with more space in first row"""
+        if len(argv) == 1:
+            argv = argv[0]
+        to_print = '%-35s' % argv[0]
+        for v in argv[1:]:
+            to_print += '%-10s' % str(v)
+        print(to_print)
+
+    def summary_results(self, table_res):
+        """Returns a simple summary of final results for a tracker"""
+        return dict(zip(self.summary_fields, self._summary_row(table_res['COMBINED_SEQ'])))
+
+    def detailed_results(self, table_res):
+        """Returns detailed final results for a tracker"""
+        # Get detailed field information
+        detailed_fields = self.float_fields + self.integer_fields
+        for h in self.float_array_fields + self.integer_array_fields:
+            for alpha in [int(100*x) for x in self.array_labels]:
+                detailed_fields.append(h + '___' + str(alpha))
+            detailed_fields.append(h + '___AUC')
+
+        # Get detailed results
+        detailed_results = {}
+        for seq, res in table_res.items():
+            detailed_row = self._detailed_row(res)
+            if len(detailed_row) != len(detailed_fields):
+                raise TrackEvalException(
+                    'Field names and data have different sizes (%i and %i)' % (len(detailed_row), len(detailed_fields)))
+            detailed_results[seq] = dict(zip(detailed_fields, detailed_row))
+        return detailed_results
+
+    def _detailed_row(self, res):
+        detailed_row = []
+        for h in self.float_fields + self.integer_fields:
+            detailed_row.append(res[h])
+        for h in self.float_array_fields + self.integer_array_fields:
+            for i, alpha in enumerate([int(100 * x) for x in self.array_labels]):
+                detailed_row.append(res[h][i])
+            detailed_row.append(np.mean(res[h]))
+        return detailed_row
--- a/test/yolov7-tracker/tracker/trackeval/metrics/clear.py
+++ b/test/yolov7-tracker/tracker/trackeval/metrics/clear.py
@@ -0,0 +1,186 @@
+
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ._base_metric import _BaseMetric
+from .. import _timing
+from .. import utils
+
+class CLEAR(_BaseMetric):
+    """Class which implements the CLEAR metrics"""
+
+    @staticmethod
+    def get_default_config():
+        """Default class config values"""
+        default_config = {
+            'THRESHOLD': 0.5,  # Similarity score threshold required for a TP match. Default 0.5.
+            'PRINT_CONFIG': True,  # Whether to print the config information on init. Default: False.
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        super().__init__()
+        main_integer_fields = ['CLR_TP', 'CLR_FN', 'CLR_FP', 'IDSW', 'MT', 'PT', 'ML', 'Frag']
+        extra_integer_fields = ['CLR_Frames']
+        self.integer_fields = main_integer_fields + extra_integer_fields
+        main_float_fields = ['MOTA', 'MOTP', 'MODA', 'CLR_Re', 'CLR_Pr', 'MTR', 'PTR', 'MLR', 'sMOTA']
+        extra_float_fields = ['CLR_F1', 'FP_per_frame', 'MOTAL', 'MOTP_sum']
+        self.float_fields = main_float_fields + extra_float_fields
+        self.fields = self.float_fields + self.integer_fields
+        self.summed_fields = self.integer_fields + ['MOTP_sum']
+        self.summary_fields = main_float_fields + main_integer_fields
+
+        # Configuration options:
+        self.config = utils.init_config(config, self.get_default_config(), self.get_name())
+        self.threshold = float(self.config['THRESHOLD'])
+
+
+    @_timing.time
+    def eval_sequence(self, data):
+        """Calculates CLEAR metrics for one sequence"""
+        # Initialise results
+        res = {}
+        for field in self.fields:
+            res[field] = 0
+
+        # Return result quickly if tracker or gt sequence is empty
+        if data['num_tracker_dets'] == 0:
+            res['CLR_FN'] = data['num_gt_dets']
+            res['ML'] = data['num_gt_ids']
+            res['MLR'] = 1.0
+            return res
+        if data['num_gt_dets'] == 0:
+            res['CLR_FP'] = data['num_tracker_dets']
+            res['MLR'] = 1.0
+            return res
+
+        # Variables counting global association
+        num_gt_ids = data['num_gt_ids']
+        gt_id_count = np.zeros(num_gt_ids)  # For MT/ML/PT
+        gt_matched_count = np.zeros(num_gt_ids)  # For MT/ML/PT
+        gt_frag_count = np.zeros(num_gt_ids)  # For Frag
+
+        # Note that IDSWs are counted based on the last time each gt_id was present (any number of frames previously),
+        # but are only used in matching to continue current tracks based on the gt_id in the single previous timestep.
+        prev_tracker_id = np.nan * np.zeros(num_gt_ids)  # For scoring IDSW
+        prev_timestep_tracker_id = np.nan * np.zeros(num_gt_ids)  # For matching IDSW
+
+        # Calculate scores for each timestep
+        for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
+            # Deal with the case that there are no gt_det/tracker_det in a timestep.
+            if len(gt_ids_t) == 0:
+                res['CLR_FP'] += len(tracker_ids_t)
+                continue
+            if len(tracker_ids_t) == 0:
+                res['CLR_FN'] += len(gt_ids_t)
+                gt_id_count[gt_ids_t] += 1
+                continue
+
+            # Calc score matrix to first minimise IDSWs from previous frame, and then maximise MOTP secondarily
+            similarity = data['similarity_scores'][t]
+            score_mat = (tracker_ids_t[np.newaxis, :] == prev_timestep_tracker_id[gt_ids_t[:, np.newaxis]])
+            score_mat = 1000 * score_mat + similarity
+            score_mat[similarity < self.threshold - np.finfo('float').eps] = 0
+
+            # Hungarian algorithm to find best matches
+            match_rows, match_cols = linear_sum_assignment(-score_mat)
+            actually_matched_mask = score_mat[match_rows, match_cols] > 0 + np.finfo('float').eps
+            match_rows = match_rows[actually_matched_mask]
+            match_cols = match_cols[actually_matched_mask]
+
+            matched_gt_ids = gt_ids_t[match_rows]
+            matched_tracker_ids = tracker_ids_t[match_cols]
+
+            # Calc IDSW for MOTA
+            prev_matched_tracker_ids = prev_tracker_id[matched_gt_ids]
+            is_idsw = (np.logical_not(np.isnan(prev_matched_tracker_ids))) & (
+                np.not_equal(matched_tracker_ids, prev_matched_tracker_ids))
+            res['IDSW'] += np.sum(is_idsw)
+
+            # Update counters for MT/ML/PT/Frag and record for IDSW/Frag for next timestep
+            gt_id_count[gt_ids_t] += 1
+            gt_matched_count[matched_gt_ids] += 1
+            not_previously_tracked = np.isnan(prev_timestep_tracker_id)
+            prev_tracker_id[matched_gt_ids] = matched_tracker_ids
+            prev_timestep_tracker_id[:] = np.nan
+            prev_timestep_tracker_id[matched_gt_ids] = matched_tracker_ids
+            currently_tracked = np.logical_not(np.isnan(prev_timestep_tracker_id))
+            gt_frag_count += np.logical_and(not_previously_tracked, currently_tracked)
+
+            # Calculate and accumulate basic statistics
+            num_matches = len(matched_gt_ids)
+            res['CLR_TP'] += num_matches
+            res['CLR_FN'] += len(gt_ids_t) - num_matches
+            res['CLR_FP'] += len(tracker_ids_t) - num_matches
+            if num_matches > 0:
+                res['MOTP_sum'] += sum(similarity[match_rows, match_cols])
+
+        # Calculate MT/ML/PT/Frag/MOTP
+        tracked_ratio = gt_matched_count[gt_id_count > 0] / gt_id_count[gt_id_count > 0]
+        res['MT'] = np.sum(np.greater(tracked_ratio, 0.8))
+        res['PT'] = np.sum(np.greater_equal(tracked_ratio, 0.2)) - res['MT']
+        res['ML'] = num_gt_ids - res['MT'] - res['PT']
+        res['Frag'] = np.sum(np.subtract(gt_frag_count[gt_frag_count > 0], 1))
+        res['MOTP'] = res['MOTP_sum'] / np.maximum(1.0, res['CLR_TP'])
+
+        res['CLR_Frames'] = data['num_timesteps']
+
+        # Calculate final CLEAR scores
+        res = self._compute_final_fields(res)
+        return res
+
+    def combine_sequences(self, all_res):
+        """Combines metrics across all sequences"""
+        res = {}
+        for field in self.summed_fields:
+            res[field] = self._combine_sum(all_res, field)
+        res = self._compute_final_fields(res)
+        return res
+
+    def combine_classes_det_averaged(self, all_res):
+        """Combines metrics across all classes by averaging over the detection values"""
+        res = {}
+        for field in self.summed_fields:
+            res[field] = self._combine_sum(all_res, field)
+        res = self._compute_final_fields(res)
+        return res
+
+    def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
+        """Combines metrics across all classes by averaging over the class values.
+        If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
+        """
+        res = {}
+        for field in self.integer_fields:
+            if ignore_empty_classes:
+                res[field] = self._combine_sum(
+                    {k: v for k, v in all_res.items() if v['CLR_TP'] + v['CLR_FN'] + v['CLR_FP'] > 0}, field)
+            else:
+                res[field] = self._combine_sum({k: v for k, v in all_res.items()}, field)
+        for field in self.float_fields:
+            if ignore_empty_classes:
+                res[field] = np.mean(
+                    [v[field] for v in all_res.values() if v['CLR_TP'] + v['CLR_FN'] + v['CLR_FP'] > 0], axis=0)
+            else:
+                res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
+        return res
+
+    @staticmethod
+    def _compute_final_fields(res):
+        """Calculate sub-metric ('field') values which only depend on other sub-metric values.
+        This function is used both for both per-sequence calculation, and in combining values across sequences.
+        """
+        num_gt_ids = res['MT'] + res['ML'] + res['PT']
+        res['MTR'] = res['MT'] / np.maximum(1.0, num_gt_ids)
+        res['MLR'] = res['ML'] / np.maximum(1.0, num_gt_ids)
+        res['PTR'] = res['PT'] / np.maximum(1.0, num_gt_ids)
+        res['CLR_Re'] = res['CLR_TP'] / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
+        res['CLR_Pr'] = res['CLR_TP'] / np.maximum(1.0, res['CLR_TP'] + res['CLR_FP'])
+        res['MODA'] = (res['CLR_TP'] - res['CLR_FP']) / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
+        res['MOTA'] = (res['CLR_TP'] - res['CLR_FP'] - res['IDSW']) / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
+        res['MOTP'] = res['MOTP_sum'] / np.maximum(1.0, res['CLR_TP'])
+        res['sMOTA'] = (res['MOTP_sum'] - res['CLR_FP'] - res['IDSW']) / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
+
+        res['CLR_F1'] = res['CLR_TP'] / np.maximum(1.0, res['CLR_TP'] + 0.5*res['CLR_FN'] + 0.5*res['CLR_FP'])
+        res['FP_per_frame'] = res['CLR_FP'] / np.maximum(1.0, res['CLR_Frames'])
+        safe_log_idsw = np.log10(res['IDSW']) if res['IDSW'] > 0 else res['IDSW']
+        res['MOTAL'] = (res['CLR_TP'] - res['CLR_FP'] - safe_log_idsw) / np.maximum(1.0, res['CLR_TP'] + res['CLR_FN'])
+        return res
--- a/test/yolov7-tracker/tracker/trackeval/metrics/count.py
+++ b/test/yolov7-tracker/tracker/trackeval/metrics/count.py
@@ -0,0 +1,44 @@
+
+from ._base_metric import _BaseMetric
+from .. import _timing
+
+
+class Count(_BaseMetric):
+    """Class which simply counts the number of tracker and gt detections and ids."""
+    def __init__(self, config=None):
+        super().__init__()
+        self.integer_fields = ['Dets', 'GT_Dets', 'IDs', 'GT_IDs']
+        self.fields = self.integer_fields
+        self.summary_fields = self.fields
+
+    @_timing.time
+    def eval_sequence(self, data):
+        """Returns counts for one sequence"""
+        # Get results
+        res = {'Dets': data['num_tracker_dets'],
+               'GT_Dets': data['num_gt_dets'],
+               'IDs': data['num_tracker_ids'],
+               'GT_IDs': data['num_gt_ids'],
+               'Frames': data['num_timesteps']}
+        return res
+
+    def combine_sequences(self, all_res):
+        """Combines metrics across all sequences"""
+        res = {}
+        for field in self.integer_fields:
+            res[field] = self._combine_sum(all_res, field)
+        return res
+
+    def combine_classes_class_averaged(self, all_res, ignore_empty_classes=None):
+        """Combines metrics across all classes by averaging over the class values"""
+        res = {}
+        for field in self.integer_fields:
+            res[field] = self._combine_sum(all_res, field)
+        return res
+
+    def combine_classes_det_averaged(self, all_res):
+        """Combines metrics across all classes by averaging over the detection values"""
+        res = {}
+        for field in self.integer_fields:
+            res[field] = self._combine_sum(all_res, field)
+        return res
--- a/test/yolov7-tracker/tracker/trackeval/metrics/hota.py
+++ b/test/yolov7-tracker/tracker/trackeval/metrics/hota.py
@@ -0,0 +1,203 @@
+
+import os
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ._base_metric import _BaseMetric
+from .. import _timing
+
+
+class HOTA(_BaseMetric):
+    """Class which implements the HOTA metrics.
+    See: https://link.springer.com/article/10.1007/s11263-020-01375-2
+    """
+
+    def __init__(self, config=None):
+        super().__init__()
+        self.plottable = True
+        self.array_labels = np.arange(0.05, 0.99, 0.05)
+        self.integer_array_fields = ['HOTA_TP', 'HOTA_FN', 'HOTA_FP']
+        self.float_array_fields = ['HOTA', 'DetA', 'AssA', 'DetRe', 'DetPr', 'AssRe', 'AssPr', 'LocA', 'OWTA']
+        self.float_fields = ['HOTA(0)', 'LocA(0)', 'HOTALocA(0)']
+        self.fields = self.float_array_fields + self.integer_array_fields + self.float_fields
+        self.summary_fields = self.float_array_fields + self.float_fields
+
+    @_timing.time
+    def eval_sequence(self, data):
+        """Calculates the HOTA metrics for one sequence"""
+
+        # Initialise results
+        res = {}
+        for field in self.float_array_fields + self.integer_array_fields:
+            res[field] = np.zeros((len(self.array_labels)), dtype=np.float)
+        for field in self.float_fields:
+            res[field] = 0
+
+        # Return result quickly if tracker or gt sequence is empty
+        if data['num_tracker_dets'] == 0:
+            res['HOTA_FN'] = data['num_gt_dets'] * np.ones((len(self.array_labels)), dtype=np.float)
+            res['LocA'] = np.ones((len(self.array_labels)), dtype=np.float)
+            res['LocA(0)'] = 1.0
+            return res
+        if data['num_gt_dets'] == 0:
+            res['HOTA_FP'] = data['num_tracker_dets'] * np.ones((len(self.array_labels)), dtype=np.float)
+            res['LocA'] = np.ones((len(self.array_labels)), dtype=np.float)
+            res['LocA(0)'] = 1.0
+            return res
+
+        # Variables counting global association
+        potential_matches_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
+        gt_id_count = np.zeros((data['num_gt_ids'], 1))
+        tracker_id_count = np.zeros((1, data['num_tracker_ids']))
+
+        # First loop through each timestep and accumulate global track information.
+        for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
+            # Count the potential matches between ids in each timestep
+            # These are normalised, weighted by the match similarity.
+            similarity = data['similarity_scores'][t]
+            sim_iou_denom = similarity.sum(0)[np.newaxis, :] + similarity.sum(1)[:, np.newaxis] - similarity
+            sim_iou = np.zeros_like(similarity)
+            sim_iou_mask = sim_iou_denom > 0 + np.finfo('float').eps
+            sim_iou[sim_iou_mask] = similarity[sim_iou_mask] / sim_iou_denom[sim_iou_mask]
+            potential_matches_count[gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]] += sim_iou
+
+            # Calculate the total number of dets for each gt_id and tracker_id.
+            gt_id_count[gt_ids_t] += 1
+            tracker_id_count[0, tracker_ids_t] += 1
+
+        # Calculate overall jaccard alignment score (before unique matching) between IDs
+        global_alignment_score = potential_matches_count / (gt_id_count + tracker_id_count - potential_matches_count)
+        matches_counts = [np.zeros_like(potential_matches_count) for _ in self.array_labels]
+
+        # Calculate scores for each timestep
+        for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
+            # Deal with the case that there are no gt_det/tracker_det in a timestep.
+            if len(gt_ids_t) == 0:
+                for a, alpha in enumerate(self.array_labels):
+                    res['HOTA_FP'][a] += len(tracker_ids_t)
+                continue
+            if len(tracker_ids_t) == 0:
+                for a, alpha in enumerate(self.array_labels):
+                    res['HOTA_FN'][a] += len(gt_ids_t)
+                continue
+
+            # Get matching scores between pairs of dets for optimizing HOTA
+            similarity = data['similarity_scores'][t]
+            score_mat = global_alignment_score[gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]] * similarity
+
+            # Hungarian algorithm to find best matches
+            match_rows, match_cols = linear_sum_assignment(-score_mat)
+
+            # Calculate and accumulate basic statistics
+            for a, alpha in enumerate(self.array_labels):
+                actually_matched_mask = similarity[match_rows, match_cols] >= alpha - np.finfo('float').eps
+                alpha_match_rows = match_rows[actually_matched_mask]
+                alpha_match_cols = match_cols[actually_matched_mask]
+                num_matches = len(alpha_match_rows)
+                res['HOTA_TP'][a] += num_matches
+                res['HOTA_FN'][a] += len(gt_ids_t) - num_matches
+                res['HOTA_FP'][a] += len(tracker_ids_t) - num_matches
+                if num_matches > 0:
+                    res['LocA'][a] += sum(similarity[alpha_match_rows, alpha_match_cols])
+                    matches_counts[a][gt_ids_t[alpha_match_rows], tracker_ids_t[alpha_match_cols]] += 1
+
+        # Calculate association scores (AssA, AssRe, AssPr) for the alpha value.
+        # First calculate scores per gt_id/tracker_id combo and then average over the number of detections.
+        for a, alpha in enumerate(self.array_labels):
+            matches_count = matches_counts[a]
+            ass_a = matches_count / np.maximum(1, gt_id_count + tracker_id_count - matches_count)
+            res['AssA'][a] = np.sum(matches_count * ass_a) / np.maximum(1, res['HOTA_TP'][a])
+            ass_re = matches_count / np.maximum(1, gt_id_count)
+            res['AssRe'][a] = np.sum(matches_count * ass_re) / np.maximum(1, res['HOTA_TP'][a])
+            ass_pr = matches_count / np.maximum(1, tracker_id_count)
+            res['AssPr'][a] = np.sum(matches_count * ass_pr) / np.maximum(1, res['HOTA_TP'][a])
+
+        # Calculate final scores
+        res['LocA'] = np.maximum(1e-10, res['LocA']) / np.maximum(1e-10, res['HOTA_TP'])
+        res = self._compute_final_fields(res)
+        return res
+
+    def combine_sequences(self, all_res):
+        """Combines metrics across all sequences"""
+        res = {}
+        for field in self.integer_array_fields:
+            res[field] = self._combine_sum(all_res, field)
+        for field in ['AssRe', 'AssPr', 'AssA']:
+            res[field] = self._combine_weighted_av(all_res, field, res, weight_field='HOTA_TP')
+        loca_weighted_sum = sum([all_res[k]['LocA'] * all_res[k]['HOTA_TP'] for k in all_res.keys()])
+        res['LocA'] = np.maximum(1e-10, loca_weighted_sum) / np.maximum(1e-10, res['HOTA_TP'])
+        res = self._compute_final_fields(res)
+        return res
+
+    def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
+        """Combines metrics across all classes by averaging over the class values.
+        If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
+        """
+        res = {}
+        for field in self.integer_array_fields:
+            if ignore_empty_classes:
+                res[field] = self._combine_sum(
+                    {k: v for k, v in all_res.items()
+                     if (v['HOTA_TP'] + v['HOTA_FN'] + v['HOTA_FP'] > 0 + np.finfo('float').eps).any()}, field)
+            else:
+                res[field] = self._combine_sum({k: v for k, v in all_res.items()}, field)
+
+        for field in self.float_fields + self.float_array_fields:
+            if ignore_empty_classes:
+                res[field] = np.mean([v[field] for v in all_res.values() if
+                                      (v['HOTA_TP'] + v['HOTA_FN'] + v['HOTA_FP'] > 0 + np.finfo('float').eps).any()],
+                                     axis=0)
+            else:
+                res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
+        return res
+
+    def combine_classes_det_averaged(self, all_res):
+        """Combines metrics across all classes by averaging over the detection values"""
+        res = {}
+        for field in self.integer_array_fields:
+            res[field] = self._combine_sum(all_res, field)
+        for field in ['AssRe', 'AssPr', 'AssA']:
+            res[field] = self._combine_weighted_av(all_res, field, res, weight_field='HOTA_TP')
+        loca_weighted_sum = sum([all_res[k]['LocA'] * all_res[k]['HOTA_TP'] for k in all_res.keys()])
+        res['LocA'] = np.maximum(1e-10, loca_weighted_sum) / np.maximum(1e-10, res['HOTA_TP'])
+        res = self._compute_final_fields(res)
+        return res
+
+    @staticmethod
+    def _compute_final_fields(res):
+        """Calculate sub-metric ('field') values which only depend on other sub-metric values.
+        This function is used both for both per-sequence calculation, and in combining values across sequences.
+        """
+        res['DetRe'] = res['HOTA_TP'] / np.maximum(1, res['HOTA_TP'] + res['HOTA_FN'])
+        res['DetPr'] = res['HOTA_TP'] / np.maximum(1, res['HOTA_TP'] + res['HOTA_FP'])
+        res['DetA'] = res['HOTA_TP'] / np.maximum(1, res['HOTA_TP'] + res['HOTA_FN'] + res['HOTA_FP'])
+        res['HOTA'] = np.sqrt(res['DetA'] * res['AssA'])
+        res['OWTA'] = np.sqrt(res['DetRe'] * res['AssA'])
+
+        res['HOTA(0)'] = res['HOTA'][0]
+        res['LocA(0)'] = res['LocA'][0]
+        res['HOTALocA(0)'] = res['HOTA(0)']*res['LocA(0)']
+        return res
+
+    def plot_single_tracker_results(self, table_res, tracker, cls, output_folder):
+        """Create plot of results"""
+
+        # Only loaded when run to reduce minimum requirements
+        from matplotlib import pyplot as plt
+
+        res = table_res['COMBINED_SEQ']
+        styles_to_plot = ['r', 'b', 'g', 'b--', 'b:', 'g--', 'g:', 'm']
+        for name, style in zip(self.float_array_fields, styles_to_plot):
+            plt.plot(self.array_labels, res[name], style)
+        plt.xlabel('alpha')
+        plt.ylabel('score')
+        plt.title(tracker + ' - ' + cls)
+        plt.axis([0, 1, 0, 1])
+        legend = []
+        for name in self.float_array_fields:
+            legend += [name + ' (' + str(np.round(np.mean(res[name]), 2)) + ')']
+        plt.legend(legend, loc='lower left')
+        out_file = os.path.join(output_folder, cls + '_plot.pdf')
+        os.makedirs(os.path.dirname(out_file), exist_ok=True)
+        plt.savefig(out_file)
+        plt.savefig(out_file.replace('.pdf', '.png'))
+        plt.clf()
--- a/test/yolov7-tracker/tracker/trackeval/metrics/identity.py
+++ b/test/yolov7-tracker/tracker/trackeval/metrics/identity.py
@@ -0,0 +1,135 @@
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ._base_metric import _BaseMetric
+from .. import _timing
+from .. import utils
+
+
+class Identity(_BaseMetric):
+    """Class which implements the ID metrics"""
+
+    @staticmethod
+    def get_default_config():
+        """Default class config values"""
+        default_config = {
+            'THRESHOLD': 0.5,  # Similarity score threshold required for a IDTP match. Default 0.5.
+            'PRINT_CONFIG': True,  # Whether to print the config information on init. Default: False.
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        super().__init__()
+        self.integer_fields = ['IDTP', 'IDFN', 'IDFP']
+        self.float_fields = ['IDF1', 'IDR', 'IDP']
+        self.fields = self.float_fields + self.integer_fields
+        self.summary_fields = self.fields
+
+        # Configuration options:
+        self.config = utils.init_config(config, self.get_default_config(), self.get_name())
+        self.threshold = float(self.config['THRESHOLD'])
+
+    @_timing.time
+    def eval_sequence(self, data):
+        """Calculates ID metrics for one sequence"""
+        # Initialise results
+        res = {}
+        for field in self.fields:
+            res[field] = 0
+
+        # Return result quickly if tracker or gt sequence is empty
+        if data['num_tracker_dets'] == 0:
+            res['IDFN'] = data['num_gt_dets']
+            return res
+        if data['num_gt_dets'] == 0:
+            res['IDFP'] = data['num_tracker_dets']
+            return res
+
+        # Variables counting global association
+        potential_matches_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
+        gt_id_count = np.zeros(data['num_gt_ids'])
+        tracker_id_count = np.zeros(data['num_tracker_ids'])
+
+        # First loop through each timestep and accumulate global track information.
+        for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
+            # Count the potential matches between ids in each timestep
+            matches_mask = np.greater_equal(data['similarity_scores'][t], self.threshold)
+            match_idx_gt, match_idx_tracker = np.nonzero(matches_mask)
+            potential_matches_count[gt_ids_t[match_idx_gt], tracker_ids_t[match_idx_tracker]] += 1
+
+            # Calculate the total number of dets for each gt_id and tracker_id.
+            gt_id_count[gt_ids_t] += 1
+            tracker_id_count[tracker_ids_t] += 1
+
+        # Calculate optimal assignment cost matrix for ID metrics
+        num_gt_ids = data['num_gt_ids']
+        num_tracker_ids = data['num_tracker_ids']
+        fp_mat = np.zeros((num_gt_ids + num_tracker_ids, num_gt_ids + num_tracker_ids))
+        fn_mat = np.zeros((num_gt_ids + num_tracker_ids, num_gt_ids + num_tracker_ids))
+        fp_mat[num_gt_ids:, :num_tracker_ids] = 1e10
+        fn_mat[:num_gt_ids, num_tracker_ids:] = 1e10
+        for gt_id in range(num_gt_ids):
+            fn_mat[gt_id, :num_tracker_ids] = gt_id_count[gt_id]
+            fn_mat[gt_id, num_tracker_ids + gt_id] = gt_id_count[gt_id]
+        for tracker_id in range(num_tracker_ids):
+            fp_mat[:num_gt_ids, tracker_id] = tracker_id_count[tracker_id]
+            fp_mat[tracker_id + num_gt_ids, tracker_id] = tracker_id_count[tracker_id]
+        fn_mat[:num_gt_ids, :num_tracker_ids] -= potential_matches_count
+        fp_mat[:num_gt_ids, :num_tracker_ids] -= potential_matches_count
+
+        # Hungarian algorithm
+        match_rows, match_cols = linear_sum_assignment(fn_mat + fp_mat)
+
+        # Accumulate basic statistics
+        res['IDFN'] = fn_mat[match_rows, match_cols].sum().astype(np.int)
+        res['IDFP'] = fp_mat[match_rows, match_cols].sum().astype(np.int)
+        res['IDTP'] = (gt_id_count.sum() - res['IDFN']).astype(np.int)
+
+        # Calculate final ID scores
+        res = self._compute_final_fields(res)
+        return res
+
+    def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
+        """Combines metrics across all classes by averaging over the class values.
+        If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
+        """
+        res = {}
+        for field in self.integer_fields:
+            if ignore_empty_classes:
+                res[field] = self._combine_sum({k: v for k, v in all_res.items()
+                                                if v['IDTP'] + v['IDFN'] + v['IDFP'] > 0 + np.finfo('float').eps},
+                                               field)
+            else:
+                res[field] = self._combine_sum({k: v for k, v in all_res.items()}, field)
+        for field in self.float_fields:
+            if ignore_empty_classes:
+                res[field] = np.mean([v[field] for v in all_res.values()
+                                      if v['IDTP'] + v['IDFN'] + v['IDFP'] > 0 + np.finfo('float').eps], axis=0)
+            else:
+                res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
+        return res
+
+    def combine_classes_det_averaged(self, all_res):
+        """Combines metrics across all classes by averaging over the detection values"""
+        res = {}
+        for field in self.integer_fields:
+            res[field] = self._combine_sum(all_res, field)
+        res = self._compute_final_fields(res)
+        return res
+
+    def combine_sequences(self, all_res):
+        """Combines metrics across all sequences"""
+        res = {}
+        for field in self.integer_fields:
+            res[field] = self._combine_sum(all_res, field)
+        res = self._compute_final_fields(res)
+        return res
+
+    @staticmethod
+    def _compute_final_fields(res):
+        """Calculate sub-metric ('field') values which only depend on other sub-metric values.
+        This function is used both for both per-sequence calculation, and in combining values across sequences.
+        """
+        res['IDR'] = res['IDTP'] / np.maximum(1.0, res['IDTP'] + res['IDFN'])
+        res['IDP'] = res['IDTP'] / np.maximum(1.0, res['IDTP'] + res['IDFP'])
+        res['IDF1'] = res['IDTP'] / np.maximum(1.0, res['IDTP'] + 0.5 * res['IDFP'] + 0.5 * res['IDFN'])
+        return res
--- a/test/yolov7-tracker/tracker/trackeval/metrics/ideucl.py
+++ b/test/yolov7-tracker/tracker/trackeval/metrics/ideucl.py
@@ -0,0 +1,135 @@
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ._base_metric import _BaseMetric
+from .. import _timing
+from collections import defaultdict
+from .. import utils
+
+
+class IDEucl(_BaseMetric):
+    """Class which implements the ID metrics"""
+
+    @staticmethod
+    def get_default_config():
+        """Default class config values"""
+        default_config = {
+            'THRESHOLD': 0.4,  # Similarity score threshold required for a IDTP match. 0.4 for IDEucl.
+            'PRINT_CONFIG': True,  # Whether to print the config information on init. Default: False.
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        super().__init__()
+        self.fields = ['IDEucl']
+        self.float_fields = self.fields
+        self.summary_fields = self.fields
+
+        # Configuration options:
+        self.config = utils.init_config(config, self.get_default_config(), self.get_name())
+        self.threshold = float(self.config['THRESHOLD'])
+
+
+    @_timing.time
+    def eval_sequence(self, data):
+        """Calculates IDEucl metrics for all frames"""
+        # Initialise results
+        res = {'IDEucl' : 0}
+
+        # Return result quickly if tracker or gt sequence is empty
+        if data['num_tracker_dets'] == 0 or data['num_gt_dets'] == 0.:
+            return res
+
+        data['centroid'] = []
+        for t, gt_det in enumerate(data['gt_dets']):
+            # import pdb;pdb.set_trace()
+            data['centroid'].append(self._compute_centroid(gt_det))
+
+        oid_hid_cent = defaultdict(list)
+        oid_cent = defaultdict(list)
+        for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
+            matches_mask = np.greater_equal(data['similarity_scores'][t], self.threshold)
+
+            # I hope the orders of ids and boxes are maintained in `data`
+            for ind, gid in enumerate(gt_ids_t):
+                oid_cent[gid].append(data['centroid'][t][ind])
+
+            match_idx_gt, match_idx_tracker = np.nonzero(matches_mask)
+            for m_gid, m_tid in zip(match_idx_gt, match_idx_tracker):
+                oid_hid_cent[gt_ids_t[m_gid], tracker_ids_t[m_tid]].append(data['centroid'][t][m_gid])
+
+        oid_hid_dist = {k : np.sum(np.linalg.norm(np.diff(np.array(v), axis=0), axis=1)) for k, v in oid_hid_cent.items()}
+        oid_dist = {int(k) : np.sum(np.linalg.norm(np.diff(np.array(v), axis=0), axis=1)) for k, v in oid_cent.items()}
+
+        unique_oid = np.unique([i[0] for i in oid_hid_dist.keys()]).tolist()
+        unique_hid = np.unique([i[1] for i in oid_hid_dist.keys()]).tolist()
+        o_len = len(unique_oid)
+        h_len = len(unique_hid)
+        dist_matrix = np.zeros((o_len, h_len))
+        for ((oid, hid), dist) in oid_hid_dist.items():
+            oid_ind = unique_oid.index(oid)
+            hid_ind = unique_hid.index(hid)
+            dist_matrix[oid_ind, hid_ind] = dist
+
+        # opt_hyp_dist contains GT ID : max dist covered by track
+        opt_hyp_dist = dict.fromkeys(oid_dist.keys(), 0.)
+        cost_matrix = np.max(dist_matrix) - dist_matrix
+        rows, cols = linear_sum_assignment(cost_matrix)
+        for (row, col) in zip(rows, cols):
+            value = dist_matrix[row, col]
+            opt_hyp_dist[int(unique_oid[row])] = value
+
+        assert len(opt_hyp_dist.keys()) == len(oid_dist.keys())
+        hyp_length = np.sum(list(opt_hyp_dist.values()))
+        gt_length = np.sum(list(oid_dist.values()))
+        id_eucl =np.mean([np.divide(a, b, out=np.zeros_like(a), where=b!=0) for a, b in zip(opt_hyp_dist.values(), oid_dist.values())])
+        res['IDEucl'] = np.divide(hyp_length, gt_length, out=np.zeros_like(hyp_length), where=gt_length!=0)
+        return res
+
+    def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
+        """Combines metrics across all classes by averaging over the class values.
+        If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
+        """
+        res = {}
+
+        for field in self.float_fields:
+            if ignore_empty_classes:
+                res[field] = np.mean([v[field] for v in all_res.values()
+                                      if v['IDEucl'] > 0 + np.finfo('float').eps], axis=0)
+            else:
+                res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
+        return res
+
+    def combine_classes_det_averaged(self, all_res):
+        """Combines metrics across all classes by averaging over the detection values"""
+        res = {}
+        for field in self.float_fields:
+            res[field] = self._combine_sum(all_res, field)
+        res = self._compute_final_fields(res, len(all_res))
+        return res
+
+    def combine_sequences(self, all_res):
+        """Combines metrics across all sequences"""
+        res = {}
+        for field in self.float_fields:
+            res[field] = self._combine_sum(all_res, field)
+        res = self._compute_final_fields(res, len(all_res))
+        return res
+
+
+    @staticmethod
+    def _compute_centroid(box):
+        box = np.array(box)
+        if len(box.shape) == 1:
+            centroid = (box[0:2] + box[2:4])/2
+        else:
+            centroid = (box[:, 0:2] + box[:, 2:4])/2
+        return  np.flip(centroid, axis=1)
+
+
+    @staticmethod
+    def _compute_final_fields(res, res_len):
+        """
+        Exists only to match signature with the original Identiy class.
+
+        """
+        return {k:v/res_len for k,v in res.items()}
--- a/test/yolov7-tracker/tracker/trackeval/metrics/j_and_f.py
+++ b/test/yolov7-tracker/tracker/trackeval/metrics/j_and_f.py
@@ -0,0 +1,310 @@
+
+import numpy as np
+import math
+from scipy.optimize import linear_sum_assignment
+from ..utils import TrackEvalException
+from ._base_metric import _BaseMetric
+from .. import _timing
+
+
+class JAndF(_BaseMetric):
+    """Class which implements the J&F metrics"""
+    def __init__(self, config=None):
+        super().__init__()
+        self.integer_fields = ['num_gt_tracks']
+        self.float_fields = ['J-Mean', 'J-Recall', 'J-Decay', 'F-Mean', 'F-Recall', 'F-Decay', 'J&F']
+        self.fields = self.float_fields + self.integer_fields
+        self.summary_fields = self.float_fields
+        self.optim_type = 'J'  # possible values J, J&F
+
+    @_timing.time
+    def eval_sequence(self, data):
+        """Returns J&F metrics for one sequence"""
+
+        # Only loaded when run to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        num_timesteps = data['num_timesteps']
+        num_tracker_ids = data['num_tracker_ids']
+        num_gt_ids = data['num_gt_ids']
+        gt_dets = data['gt_dets']
+        tracker_dets = data['tracker_dets']
+        gt_ids = data['gt_ids']
+        tracker_ids = data['tracker_ids']
+
+        # get shape of frames
+        frame_shape = None
+        if num_gt_ids > 0:
+            for t in range(num_timesteps):
+                if len(gt_ids[t]) > 0:
+                    frame_shape = gt_dets[t][0]['size']
+                    break
+        elif num_tracker_ids > 0:
+            for t in range(num_timesteps):
+                if len(tracker_ids[t]) > 0:
+                    frame_shape = tracker_dets[t][0]['size']
+                    break
+
+        if frame_shape:
+            # append all zero masks for timesteps in which tracks do not have a detection
+            zero_padding = np.zeros((frame_shape), order= 'F').astype(np.uint8)
+            padding_mask = mask_utils.encode(zero_padding)
+            for t in range(num_timesteps):
+                gt_id_det_mapping = {gt_ids[t][i]: gt_dets[t][i] for i in range(len(gt_ids[t]))}
+                gt_dets[t] = [gt_id_det_mapping[index] if index in gt_ids[t] else padding_mask for index
+                              in range(num_gt_ids)]
+                tracker_id_det_mapping = {tracker_ids[t][i]: tracker_dets[t][i] for i in range(len(tracker_ids[t]))}
+                tracker_dets[t] = [tracker_id_det_mapping[index] if index in tracker_ids[t] else padding_mask for index
+                                   in range(num_tracker_ids)]
+            # also perform zero padding if number of tracker IDs < number of ground truth IDs
+            if num_tracker_ids < num_gt_ids:
+                diff = num_gt_ids - num_tracker_ids
+                for t in range(num_timesteps):
+                    tracker_dets[t] = tracker_dets[t] + [padding_mask for _ in range(diff)]
+                num_tracker_ids += diff
+
+        j = self._compute_j(gt_dets, tracker_dets, num_gt_ids, num_tracker_ids, num_timesteps)
+
+        # boundary threshold for F computation
+        bound_th = 0.008
+
+        # perform matching
+        if self.optim_type == 'J&F':
+            f = np.zeros_like(j)
+            for k in range(num_tracker_ids):
+                for i in range(num_gt_ids):
+                    f[k, i, :] = self._compute_f(gt_dets, tracker_dets, k, i, bound_th)
+            optim_metrics = (np.mean(j, axis=2) + np.mean(f, axis=2)) / 2
+            row_ind, col_ind = linear_sum_assignment(- optim_metrics)
+            j_m = j[row_ind, col_ind, :]
+            f_m = f[row_ind, col_ind, :]
+        elif self.optim_type == 'J':
+            optim_metrics = np.mean(j, axis=2)
+            row_ind, col_ind = linear_sum_assignment(- optim_metrics)
+            j_m = j[row_ind, col_ind, :]
+            f_m = np.zeros_like(j_m)
+            for i, (tr_ind, gt_ind) in enumerate(zip(row_ind, col_ind)):
+                f_m[i] = self._compute_f(gt_dets, tracker_dets, tr_ind, gt_ind, bound_th)
+        else:
+            raise TrackEvalException('Unsupported optimization type %s for J&F metric.' % self.optim_type)
+
+        # append zeros for false negatives
+        if j_m.shape[0] < data['num_gt_ids']:
+            diff = data['num_gt_ids'] - j_m.shape[0]
+            j_m = np.concatenate((j_m, np.zeros((diff, j_m.shape[1]))), axis=0)
+            f_m = np.concatenate((f_m, np.zeros((diff, f_m.shape[1]))), axis=0)
+
+        # compute the metrics for each ground truth track
+        res = {
+            'J-Mean': [np.nanmean(j_m[i, :]) for i in range(j_m.shape[0])],
+            'J-Recall': [np.nanmean(j_m[i, :] > 0.5 + np.finfo('float').eps) for i in range(j_m.shape[0])],
+            'F-Mean': [np.nanmean(f_m[i, :]) for i in range(f_m.shape[0])],
+            'F-Recall': [np.nanmean(f_m[i, :] > 0.5 + np.finfo('float').eps) for i in range(f_m.shape[0])],
+            'J-Decay': [],
+            'F-Decay': []
+        }
+        n_bins = 4
+        ids = np.round(np.linspace(1, data['num_timesteps'], n_bins + 1) + 1e-10) - 1
+        ids = ids.astype(np.uint8)
+
+        for k in range(j_m.shape[0]):
+            d_bins_j = [j_m[k][ids[i]:ids[i + 1] + 1] for i in range(0, n_bins)]
+            res['J-Decay'].append(np.nanmean(d_bins_j[0]) - np.nanmean(d_bins_j[3]))
+        for k in range(f_m.shape[0]):
+            d_bins_f = [f_m[k][ids[i]:ids[i + 1] + 1] for i in range(0, n_bins)]
+            res['F-Decay'].append(np.nanmean(d_bins_f[0]) - np.nanmean(d_bins_f[3]))
+
+        # count number of tracks for weighting of the result
+        res['num_gt_tracks'] = len(res['J-Mean'])
+        for field in ['J-Mean', 'J-Recall', 'J-Decay', 'F-Mean', 'F-Recall', 'F-Decay']:
+            res[field] = np.mean(res[field])
+        res['J&F'] = (res['J-Mean'] + res['F-Mean']) / 2
+        return res
+
+    def combine_sequences(self, all_res):
+        """Combines metrics across all sequences"""
+        res = {'num_gt_tracks': self._combine_sum(all_res, 'num_gt_tracks')}
+        for field in self.summary_fields:
+            res[field] = self._combine_weighted_av(all_res, field, res, weight_field='num_gt_tracks')
+        return res
+
+    def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
+        """Combines metrics across all classes by averaging over the class values
+        'ignore empty classes' is not yet implemented here.
+        """
+        res = {'num_gt_tracks': self._combine_sum(all_res, 'num_gt_tracks')}
+        for field in self.float_fields:
+            res[field] = np.mean([v[field] for v in all_res.values()])
+        return res
+
+    def combine_classes_det_averaged(self, all_res):
+        """Combines metrics across all classes by averaging over the detection values"""
+        res = {'num_gt_tracks': self._combine_sum(all_res, 'num_gt_tracks')}
+        for field in self.float_fields:
+            res[field] = np.mean([v[field] for v in all_res.values()])
+        return res
+
+    @staticmethod
+    def _seg2bmap(seg, width=None, height=None):
+        """
+        From a segmentation, compute a binary boundary map with 1 pixel wide
+        boundaries.  The boundary pixels are offset by 1/2 pixel towards the
+        origin from the actual segment boundary.
+        Arguments:
+            seg     : Segments labeled from 1..k.
+            width	  :	Width of desired bmap  <= seg.shape[1]
+            height  :	Height of desired bmap <= seg.shape[0]
+        Returns:
+            bmap (ndarray):	Binary boundary map.
+         David Martin <dmartin@eecs.berkeley.edu>
+         January 2003
+        """
+
+        seg = seg.astype(np.bool)
+        seg[seg > 0] = 1
+
+        assert np.atleast_3d(seg).shape[2] == 1
+
+        width = seg.shape[1] if width is None else width
+        height = seg.shape[0] if height is None else height
+
+        h, w = seg.shape[:2]
+
+        ar1 = float(width) / float(height)
+        ar2 = float(w) / float(h)
+
+        assert not (
+                width > w | height > h | abs(ar1 - ar2) > 0.01
+        ), "Can" "t convert %dx%d seg to %dx%d bmap." % (w, h, width, height)
+
+        e = np.zeros_like(seg)
+        s = np.zeros_like(seg)
+        se = np.zeros_like(seg)
+
+        e[:, :-1] = seg[:, 1:]
+        s[:-1, :] = seg[1:, :]
+        se[:-1, :-1] = seg[1:, 1:]
+
+        b = seg ^ e | seg ^ s | seg ^ se
+        b[-1, :] = seg[-1, :] ^ e[-1, :]
+        b[:, -1] = seg[:, -1] ^ s[:, -1]
+        b[-1, -1] = 0
+
+        if w == width and h == height:
+            bmap = b
+        else:
+            bmap = np.zeros((height, width))
+            for x in range(w):
+                for y in range(h):
+                    if b[y, x]:
+                        j = 1 + math.floor((y - 1) + height / h)
+                        i = 1 + math.floor((x - 1) + width / h)
+                        bmap[j, i] = 1
+
+        return bmap
+
+    @staticmethod
+    def _compute_f(gt_data, tracker_data, tracker_data_id, gt_id, bound_th):
+        """
+        Perform F computation for a given gt and a given tracker ID. Adapted from
+        https://github.com/davisvideochallenge/davis2017-evaluation
+        :param gt_data: the encoded gt masks
+        :param tracker_data: the encoded tracker masks
+        :param tracker_data_id: the tracker ID
+        :param gt_id: the ground truth ID
+        :param bound_th: boundary threshold parameter
+        :return: the F value for the given tracker and gt ID
+        """
+
+        # Only loaded when run to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+        from skimage.morphology import disk
+        import cv2
+
+        f = np.zeros(len(gt_data))
+
+        for t, (gt_masks, tracker_masks) in enumerate(zip(gt_data, tracker_data)):
+            curr_tracker_mask = mask_utils.decode(tracker_masks[tracker_data_id])
+            curr_gt_mask = mask_utils.decode(gt_masks[gt_id])
+            
+            bound_pix = bound_th if bound_th >= 1 - np.finfo('float').eps else \
+                np.ceil(bound_th * np.linalg.norm(curr_tracker_mask.shape))
+
+            # Get the pixel boundaries of both masks
+            fg_boundary = JAndF._seg2bmap(curr_tracker_mask)
+            gt_boundary = JAndF._seg2bmap(curr_gt_mask)
+
+            # fg_dil = binary_dilation(fg_boundary, disk(bound_pix))
+            fg_dil = cv2.dilate(fg_boundary.astype(np.uint8), disk(bound_pix).astype(np.uint8))
+            # gt_dil = binary_dilation(gt_boundary, disk(bound_pix))
+            gt_dil = cv2.dilate(gt_boundary.astype(np.uint8), disk(bound_pix).astype(np.uint8))
+
+            # Get the intersection
+            gt_match = gt_boundary * fg_dil
+            fg_match = fg_boundary * gt_dil
+
+            # Area of the intersection
+            n_fg = np.sum(fg_boundary)
+            n_gt = np.sum(gt_boundary)
+
+            # % Compute precision and recall
+            if n_fg == 0 and n_gt > 0:
+                precision = 1
+                recall = 0
+            elif n_fg > 0 and n_gt == 0:
+                precision = 0
+                recall = 1
+            elif n_fg == 0 and n_gt == 0:
+                precision = 1
+                recall = 1
+            else:
+                precision = np.sum(fg_match) / float(n_fg)
+                recall = np.sum(gt_match) / float(n_gt)
+
+            # Compute F measure
+            if precision + recall == 0:
+                f_val = 0
+            else:
+                f_val = 2 * precision * recall / (precision + recall)
+
+            f[t] = f_val
+
+        return f
+
+    @staticmethod
+    def _compute_j(gt_data, tracker_data, num_gt_ids, num_tracker_ids, num_timesteps):
+        """
+        Computation of J value for all ground truth IDs and all tracker IDs in the given sequence. Adapted from
+        https://github.com/davisvideochallenge/davis2017-evaluation
+        :param gt_data: the ground truth masks
+        :param tracker_data: the tracker masks
+        :param num_gt_ids: the number of ground truth IDs
+        :param num_tracker_ids: the number of tracker IDs
+        :param num_timesteps: the number of timesteps
+        :return: the J values
+        """
+
+        # Only loaded when run to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        j = np.zeros((num_tracker_ids, num_gt_ids, num_timesteps))
+
+        for t, (time_gt, time_data) in enumerate(zip(gt_data, tracker_data)):
+            # run length encoded masks with pycocotools
+            area_gt = mask_utils.area(time_gt)
+            time_data = list(time_data)
+            area_tr = mask_utils.area(time_data)
+
+            area_tr = np.repeat(area_tr[:, np.newaxis], len(area_gt), axis=1)
+            area_gt = np.repeat(area_gt[np.newaxis, :], len(area_tr), axis=0)
+
+            # mask iou computation with pycocotools
+            ious = np.atleast_2d(mask_utils.iou(time_data, time_gt, [0]*len(time_gt)))
+            # set iou to 1 if both masks are close to 0 (no ground truth and no predicted mask in timestep)
+            ious[np.isclose(area_tr, 0) & np.isclose(area_gt, 0)] = 1
+            assert (ious >= 0 - np.finfo('float').eps).all()
+            assert (ious <= 1 + np.finfo('float').eps).all()
+
+            j[..., t] = ious
+
+        return j
--- a/test/yolov7-tracker/tracker/trackeval/metrics/track_map.py
+++ b/test/yolov7-tracker/tracker/trackeval/metrics/track_map.py
@@ -0,0 +1,462 @@
+import numpy as np
+from ._base_metric import _BaseMetric
+from .. import _timing
+from functools import partial
+from .. import utils
+from ..utils import TrackEvalException
+
+
+class TrackMAP(_BaseMetric):
+    """Class which implements the TrackMAP metrics"""
+
+    @staticmethod
+    def get_default_metric_config():
+        """Default class config values"""
+        default_config = {
+            'USE_AREA_RANGES': True,  # whether to evaluate for certain area ranges
+            'AREA_RANGES': [[0 ** 2, 32 ** 2],  # additional area range sets for which TrackMAP is evaluated
+                            [32 ** 2, 96 ** 2],  # (all area range always included), default values for TAO
+                            [96 ** 2, 1e5 ** 2]],  # evaluation
+            'AREA_RANGE_LABELS': ["area_s", "area_m", "area_l"],  # the labels for the area ranges
+            'USE_TIME_RANGES': True,  # whether to evaluate for certain time ranges (length of tracks)
+            'TIME_RANGES': [[0, 3], [3, 10], [10, 1e5]],  # additional time range sets for which TrackMAP is evaluated
+            # (all time range always included) , default values for TAO evaluation
+            'TIME_RANGE_LABELS': ["time_s", "time_m", "time_l"],  # the labels for the time ranges
+            'IOU_THRESHOLDS': np.arange(0.5, 0.96, 0.05),  # the IoU thresholds
+            'RECALL_THRESHOLDS': np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01) + 1), endpoint=True),
+            # recall thresholds at which precision is evaluated
+            'MAX_DETECTIONS': 0,  # limit the maximum number of considered tracks per sequence (0 for unlimited)
+            'PRINT_CONFIG': True
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        super().__init__()
+        self.config = utils.init_config(config, self.get_default_metric_config(), self.get_name())
+
+        self.num_ig_masks = 1
+        self.lbls = ['all']
+        self.use_area_rngs = self.config['USE_AREA_RANGES']
+        if self.use_area_rngs:
+            self.area_rngs = self.config['AREA_RANGES']
+            self.area_rng_lbls = self.config['AREA_RANGE_LABELS']
+            self.num_ig_masks += len(self.area_rng_lbls)
+            self.lbls += self.area_rng_lbls
+
+        self.use_time_rngs = self.config['USE_TIME_RANGES']
+        if self.use_time_rngs:
+            self.time_rngs = self.config['TIME_RANGES']
+            self.time_rng_lbls = self.config['TIME_RANGE_LABELS']
+            self.num_ig_masks += len(self.time_rng_lbls)
+            self.lbls += self.time_rng_lbls
+
+        self.array_labels = self.config['IOU_THRESHOLDS']
+        self.rec_thrs = self.config['RECALL_THRESHOLDS']
+
+        self.maxDet = self.config['MAX_DETECTIONS']
+        self.float_array_fields = ['AP_' + lbl for lbl in self.lbls] + ['AR_' + lbl for lbl in self.lbls]
+        self.fields = self.float_array_fields
+        self.summary_fields = self.float_array_fields
+
+    @_timing.time
+    def eval_sequence(self, data):
+        """Calculates GT and Tracker matches for one sequence for TrackMAP metrics. Adapted from
+        https://github.com/TAO-Dataset/"""
+
+        # Initialise results to zero for each sequence as the fields are only defined over the set of all sequences
+        res = {}
+        for field in self.fields:
+            res[field] = [0 for _ in self.array_labels]
+
+        gt_ids, dt_ids = data['gt_track_ids'], data['dt_track_ids']
+
+        if len(gt_ids) == 0 and len(dt_ids) == 0:
+            for idx in range(self.num_ig_masks):
+                res[idx] = None
+            return res
+
+        # get track data
+        gt_tr_areas = data.get('gt_track_areas', None) if self.use_area_rngs else None
+        gt_tr_lengths = data.get('gt_track_lengths', None) if self.use_time_rngs else None
+        gt_tr_iscrowd = data.get('gt_track_iscrowd', None)
+        dt_tr_areas = data.get('dt_track_areas', None) if self.use_area_rngs else None
+        dt_tr_lengths = data.get('dt_track_lengths', None) if self.use_time_rngs else None
+        is_nel = data.get('not_exhaustively_labeled', False)
+
+        # compute ignore masks for different track sets to eval
+        gt_ig_masks = self._compute_track_ig_masks(len(gt_ids), track_lengths=gt_tr_lengths, track_areas=gt_tr_areas,
+                                                   iscrowd=gt_tr_iscrowd)
+        dt_ig_masks = self._compute_track_ig_masks(len(dt_ids), track_lengths=dt_tr_lengths, track_areas=dt_tr_areas,
+                                                   is_not_exhaustively_labeled=is_nel, is_gt=False)
+
+        boxformat = data.get('boxformat', 'xywh')
+        ious = self._compute_track_ious(data['dt_tracks'], data['gt_tracks'], iou_function=data['iou_type'],
+                                        boxformat=boxformat)
+
+        for mask_idx in range(self.num_ig_masks):
+            gt_ig_mask = gt_ig_masks[mask_idx]
+
+            # Sort gt ignore last
+            gt_idx = np.argsort([g for g in gt_ig_mask], kind="mergesort")
+            gt_ids = [gt_ids[i] for i in gt_idx]
+
+            ious_sorted = ious[:, gt_idx] if len(ious) > 0 else ious
+
+            num_thrs = len(self.array_labels)
+            num_gt = len(gt_ids)
+            num_dt = len(dt_ids)
+
+            # Array to store the "id" of the matched dt/gt
+            gt_m = np.zeros((num_thrs, num_gt)) - 1
+            dt_m = np.zeros((num_thrs, num_dt)) - 1
+
+            gt_ig = np.array([gt_ig_mask[idx] for idx in gt_idx])
+            dt_ig = np.zeros((num_thrs, num_dt))
+
+            for iou_thr_idx, iou_thr in enumerate(self.array_labels):
+                if len(ious_sorted) == 0:
+                    break
+
+                for dt_idx, _dt in enumerate(dt_ids):
+                    iou = min([iou_thr, 1 - 1e-10])
+                    # information about best match so far (m=-1 -> unmatched)
+                    # store the gt_idx which matched for _dt
+                    m = -1
+                    for gt_idx, _ in enumerate(gt_ids):
+                        # if this gt already matched continue
+                        if gt_m[iou_thr_idx, gt_idx] > 0:
+                            continue
+                        # if _dt matched to reg gt, and on ignore gt, stop
+                        if m > -1 and gt_ig[m] == 0 and gt_ig[gt_idx] == 1:
+                            break
+                        # continue to next gt unless better match made
+                        if ious_sorted[dt_idx, gt_idx] < iou - np.finfo('float').eps:
+                            continue
+                        # if match successful and best so far, store appropriately
+                        iou = ious_sorted[dt_idx, gt_idx]
+                        m = gt_idx
+
+                    # No match found for _dt, go to next _dt
+                    if m == -1:
+                        continue
+
+                    # if gt to ignore for some reason update dt_ig.
+                    # Should not be used in evaluation.
+                    dt_ig[iou_thr_idx, dt_idx] = gt_ig[m]
+                    # _dt match found, update gt_m, and dt_m with "id"
+                    dt_m[iou_thr_idx, dt_idx] = gt_ids[m]
+                    gt_m[iou_thr_idx, m] = _dt
+
+            dt_ig_mask = dt_ig_masks[mask_idx]
+
+            dt_ig_mask = np.array(dt_ig_mask).reshape((1, num_dt))  # 1 X num_dt
+            dt_ig_mask = np.repeat(dt_ig_mask, num_thrs, 0)  # num_thrs X num_dt
+
+            # Based on dt_ig_mask ignore any unmatched detection by updating dt_ig
+            dt_ig = np.logical_or(dt_ig, np.logical_and(dt_m == -1, dt_ig_mask))
+            # store results for given video and category
+            res[mask_idx] = {
+                "dt_ids": dt_ids,
+                "gt_ids": gt_ids,
+                "dt_matches": dt_m,
+                "gt_matches": gt_m,
+                "dt_scores": data['dt_track_scores'],
+                "gt_ignore": gt_ig,
+                "dt_ignore": dt_ig,
+            }
+
+        return res
+
+    def combine_sequences(self, all_res):
+        """Combines metrics across all sequences. Computes precision and recall values based on track matches.
+        Adapted from https://github.com/TAO-Dataset/
+        """
+        num_thrs = len(self.array_labels)
+        num_recalls = len(self.rec_thrs)
+
+        # -1 for absent categories
+        precision = -np.ones(
+            (num_thrs, num_recalls, self.num_ig_masks)
+        )
+        recall = -np.ones((num_thrs, self.num_ig_masks))
+
+        for ig_idx in range(self.num_ig_masks):
+            ig_idx_results = [res[ig_idx] for res in all_res.values() if res[ig_idx] is not None]
+
+            # Remove elements which are None
+            if len(ig_idx_results) == 0:
+                continue
+
+            # Append all scores: shape (N,)
+            # limit considered tracks for each sequence if maxDet > 0
+            if self.maxDet == 0:
+                dt_scores = np.concatenate([res["dt_scores"] for res in ig_idx_results], axis=0)
+
+                dt_idx = np.argsort(-dt_scores, kind="mergesort")
+
+                dt_m = np.concatenate([e["dt_matches"] for e in ig_idx_results],
+                                      axis=1)[:, dt_idx]
+                dt_ig = np.concatenate([e["dt_ignore"] for e in ig_idx_results],
+                                       axis=1)[:, dt_idx]
+            elif self.maxDet > 0:
+                dt_scores = np.concatenate([res["dt_scores"][0:self.maxDet] for res in ig_idx_results], axis=0)
+
+                dt_idx = np.argsort(-dt_scores, kind="mergesort")
+
+                dt_m = np.concatenate([e["dt_matches"][:, 0:self.maxDet] for e in ig_idx_results],
+                                      axis=1)[:, dt_idx]
+                dt_ig = np.concatenate([e["dt_ignore"][:, 0:self.maxDet] for e in ig_idx_results],
+                                       axis=1)[:, dt_idx]
+            else:
+                raise Exception("Number of maximum detections must be >= 0, but is set to %i" % self.maxDet)
+
+            gt_ig = np.concatenate([res["gt_ignore"] for res in ig_idx_results])
+            # num gt anns to consider
+            num_gt = np.count_nonzero(gt_ig == 0)
+
+            if num_gt == 0:
+                continue
+
+            tps = np.logical_and(dt_m != -1, np.logical_not(dt_ig))
+            fps = np.logical_and(dt_m == -1, np.logical_not(dt_ig))
+
+            tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
+            fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
+
+            for iou_thr_idx, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+                tp = np.array(tp)
+                fp = np.array(fp)
+                num_tp = len(tp)
+                rc = tp / num_gt
+                if num_tp:
+                    recall[iou_thr_idx, ig_idx] = rc[-1]
+                else:
+                    recall[iou_thr_idx, ig_idx] = 0
+
+                # np.spacing(1) ~= eps
+                pr = tp / (fp + tp + np.spacing(1))
+                pr = pr.tolist()
+
+                # Ensure precision values are monotonically decreasing
+                for i in range(num_tp - 1, 0, -1):
+                    if pr[i] > pr[i - 1]:
+                        pr[i - 1] = pr[i]
+
+                # find indices at the predefined recall values
+                rec_thrs_insert_idx = np.searchsorted(rc, self.rec_thrs, side="left")
+
+                pr_at_recall = [0.0] * num_recalls
+
+                try:
+                    for _idx, pr_idx in enumerate(rec_thrs_insert_idx):
+                        pr_at_recall[_idx] = pr[pr_idx]
+                except IndexError:
+                    pass
+
+                precision[iou_thr_idx, :, ig_idx] = (np.array(pr_at_recall))
+
+        res = {'precision': precision, 'recall': recall}
+
+        # compute the precision and recall averages for the respective alpha thresholds and ignore masks
+        for lbl in self.lbls:
+            res['AP_' + lbl] = np.zeros((len(self.array_labels)), dtype=np.float)
+            res['AR_' + lbl] = np.zeros((len(self.array_labels)), dtype=np.float)
+
+        for a_id, alpha in enumerate(self.array_labels):
+            for lbl_idx, lbl in enumerate(self.lbls):
+                p = precision[a_id, :, lbl_idx]
+                if len(p[p > -1]) == 0:
+                    mean_p = -1
+                else:
+                    mean_p = np.mean(p[p > -1])
+                res['AP_' + lbl][a_id] = mean_p
+                res['AR_' + lbl][a_id] = recall[a_id, lbl_idx]
+
+        return res
+
+    def combine_classes_class_averaged(self, all_res, ignore_empty_classes=True):
+        """Combines metrics across all classes by averaging over the class values
+        Note mAP is not well defined for 'empty classes' so 'ignore empty classes' is always true here.
+        """
+        res = {}
+        for field in self.fields:
+            res[field] = np.zeros((len(self.array_labels)), dtype=np.float)
+            field_stacked = np.array([res[field] for res in all_res.values()])
+
+            for a_id, alpha in enumerate(self.array_labels):
+                values = field_stacked[:, a_id]
+                if len(values[values > -1]) == 0:
+                    mean = -1
+                else:
+                    mean = np.mean(values[values > -1])
+                res[field][a_id] = mean
+        return res
+
+    def combine_classes_det_averaged(self, all_res):
+        """Combines metrics across all classes by averaging over the detection values"""
+
+        res = {}
+        for field in self.fields:
+            res[field] = np.zeros((len(self.array_labels)), dtype=np.float)
+            field_stacked = np.array([res[field] for res in all_res.values()])
+
+            for a_id, alpha in enumerate(self.array_labels):
+                values = field_stacked[:, a_id]
+                if len(values[values > -1]) == 0:
+                    mean = -1
+                else:
+                    mean = np.mean(values[values > -1])
+                res[field][a_id] = mean
+        return res
+
+    def _compute_track_ig_masks(self, num_ids, track_lengths=None, track_areas=None, iscrowd=None,
+                                is_not_exhaustively_labeled=False, is_gt=True):
+        """
+        Computes ignore masks for different track sets to evaluate
+        :param num_ids: the number of track IDs
+        :param track_lengths: the lengths of the tracks (number of timesteps)
+        :param track_areas: the average area of a track
+        :param iscrowd: whether a track is marked as crowd
+        :param is_not_exhaustively_labeled: whether the track category is not exhaustively labeled
+        :param is_gt: whether it is gt
+        :return: the track ignore masks
+        """
+        # for TAO tracks for classes which are not exhaustively labeled are not evaluated
+        if not is_gt and is_not_exhaustively_labeled:
+            track_ig_masks = [[1 for _ in range(num_ids)] for i in range(self.num_ig_masks)]
+        else:
+            # consider all tracks
+            track_ig_masks = [[0 for _ in range(num_ids)]]
+
+            # consider tracks with certain area
+            if self.use_area_rngs:
+                for rng in self.area_rngs:
+                    track_ig_masks.append([0 if rng[0] - np.finfo('float').eps <= area <= rng[1] + np.finfo('float').eps
+                                           else 1 for area in track_areas])
+
+            # consider tracks with certain duration
+            if self.use_time_rngs:
+                for rng in self.time_rngs:
+                    track_ig_masks.append([0 if rng[0] - np.finfo('float').eps <= length
+                                                <= rng[1] + np.finfo('float').eps else 1 for length in track_lengths])
+
+        # for YouTubeVIS evaluation tracks with crowd tag are not evaluated
+        if is_gt and iscrowd:
+            track_ig_masks = [np.logical_or(mask, iscrowd) for mask in track_ig_masks]
+
+        return track_ig_masks
+
+    @staticmethod
+    def _compute_bb_track_iou(dt_track, gt_track, boxformat='xywh'):
+        """
+        Calculates the track IoU for one detected track and one ground truth track for bounding boxes
+        :param dt_track: the detected track (format: dictionary with frame index as keys and
+                            numpy arrays as values)
+        :param gt_track: the ground truth track (format: dictionary with frame index as keys and
+                        numpy array as values)
+        :param boxformat: the format of the boxes
+        :return: the track IoU
+        """
+        intersect = 0
+        union = 0
+        image_ids = set(gt_track.keys()) | set(dt_track.keys())
+        for image in image_ids:
+            g = gt_track.get(image, None)
+            d = dt_track.get(image, None)
+            if boxformat == 'xywh':
+                if d is not None and g is not None:
+                    dx, dy, dw, dh = d
+                    gx, gy, gw, gh = g
+                    w = max(min(dx + dw, gx + gw) - max(dx, gx), 0)
+                    h = max(min(dy + dh, gy + gh) - max(dy, gy), 0)
+                    i = w * h
+                    u = dw * dh + gw * gh - i
+                    intersect += i
+                    union += u
+                elif d is None and g is not None:
+                    union += g[2] * g[3]
+                elif d is not None and g is None:
+                    union += d[2] * d[3]
+            elif boxformat == 'x0y0x1y1':
+                if d is not None and g is not None:
+                    dx0, dy0, dx1, dy1 = d
+                    gx0, gy0, gx1, gy1 = g
+                    w = max(min(dx1, gx1) - max(dx0, gx0), 0)
+                    h = max(min(dy1, gy1) - max(dy0, gy0), 0)
+                    i = w * h
+                    u = (dx1 - dx0) * (dy1 - dy0) + (gx1 - gx0) * (gy1 - gy0) - i
+                    intersect += i
+                    union += u
+                elif d is None and g is not None:
+                    union += (g[2] - g[0]) * (g[3] - g[1])
+                elif d is not None and g is None:
+                    union += (d[2] - d[0]) * (d[3] - d[1])
+            else:
+                raise TrackEvalException('BoxFormat not implemented')
+        if intersect > union:
+            raise TrackEvalException("Intersection value > union value. Are the box values corrupted?")
+        return intersect / union if union > 0 else 0
+
+    @staticmethod
+    def _compute_mask_track_iou(dt_track, gt_track):
+        """
+        Calculates the track IoU for one detected track and one ground truth track for segmentation masks
+        :param dt_track: the detected track (format: dictionary with frame index as keys and
+                            pycocotools rle encoded masks as values)
+        :param gt_track: the ground truth track (format: dictionary with frame index as keys and
+                            pycocotools rle encoded masks as values)
+        :return: the track IoU
+        """
+        # only loaded when needed to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        intersect = .0
+        union = .0
+        image_ids = set(gt_track.keys()) | set(dt_track.keys())
+        for image in image_ids:
+            g = gt_track.get(image, None)
+            d = dt_track.get(image, None)
+            if d and g:
+                intersect += mask_utils.area(mask_utils.merge([d, g], True))
+                union += mask_utils.area(mask_utils.merge([d, g], False))
+            elif not d and g:
+                union += mask_utils.area(g)
+            elif d and not g:
+                union += mask_utils.area(d)
+        if union < 0.0 - np.finfo('float').eps:
+            raise TrackEvalException("Union value < 0. Are the segmentaions corrupted?")
+        if intersect > union:
+            raise TrackEvalException("Intersection value > union value. Are the segmentations corrupted?")
+        iou = intersect / union if union > 0.0 + np.finfo('float').eps else 0.0
+        return iou
+
+    @staticmethod
+    def _compute_track_ious(dt, gt, iou_function='bbox', boxformat='xywh'):
+        """
+        Calculate track IoUs for a set of ground truth tracks and a set of detected tracks
+        """
+
+        if len(gt) == 0 and len(dt) == 0:
+            return []
+
+        if iou_function == 'bbox':
+            track_iou_function = partial(TrackMAP._compute_bb_track_iou, boxformat=boxformat)
+        elif iou_function == 'mask':
+            track_iou_function = partial(TrackMAP._compute_mask_track_iou)
+        else:
+            raise Exception('IoU function not implemented')
+
+        ious = np.zeros([len(dt), len(gt)])
+        for i, j in np.ndindex(ious.shape):
+            ious[i, j] = track_iou_function(dt[i], gt[j])
+        return ious
+
+    @staticmethod
+    def _row_print(*argv):
+        """Prints results in an evenly spaced rows, with more space in first row"""
+        if len(argv) == 1:
+            argv = argv[0]
+        to_print = '%-40s' % argv[0]
+        for v in argv[1:]:
+            to_print += '%-12s' % str(v)
+        print(to_print)
--- a/test/yolov7-tracker/tracker/trackeval/metrics/vace.py
+++ b/test/yolov7-tracker/tracker/trackeval/metrics/vace.py
@@ -0,0 +1,131 @@
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ._base_metric import _BaseMetric
+from .. import _timing
+
+
+class VACE(_BaseMetric):
+    """Class which implements the VACE metrics.
+
+    The metrics are described in:
+    Manohar et al. (2006) "Performance Evaluation of Object Detection and Tracking in Video"
+    https://link.springer.com/chapter/10.1007/11612704_16
+
+    This implementation uses the "relaxed" variant of the metrics,
+    where an overlap threshold is applied in each frame.
+    """
+
+    def __init__(self, config=None):
+        super().__init__()
+        self.integer_fields = ['VACE_IDs', 'VACE_GT_IDs', 'num_non_empty_timesteps']
+        self.float_fields = ['STDA', 'ATA', 'FDA', 'SFDA']
+        self.fields = self.integer_fields + self.float_fields
+        self.summary_fields = ['SFDA', 'ATA']
+
+        # Fields that are accumulated over multiple videos.
+        self._additive_fields = self.integer_fields + ['STDA', 'FDA']
+
+        self.threshold = 0.5
+
+    @_timing.time
+    def eval_sequence(self, data):
+        """Calculates VACE metrics for one sequence.
+
+        Depends on the fields:
+            data['num_gt_ids']
+            data['num_tracker_ids']
+            data['gt_ids']
+            data['tracker_ids']
+            data['similarity_scores']
+        """
+        res = {}
+
+        # Obtain Average Tracking Accuracy (ATA) using track correspondence.
+        # Obtain counts necessary to compute temporal IOU.
+        # Assume that integer counts can be represented exactly as floats.
+        potential_matches_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
+        gt_id_count = np.zeros(data['num_gt_ids'])
+        tracker_id_count = np.zeros(data['num_tracker_ids'])
+        both_present_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
+        for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
+            # Count the number of frames in which two tracks satisfy the overlap criterion.
+            matches_mask = np.greater_equal(data['similarity_scores'][t], self.threshold)
+            match_idx_gt, match_idx_tracker = np.nonzero(matches_mask)
+            potential_matches_count[gt_ids_t[match_idx_gt], tracker_ids_t[match_idx_tracker]] += 1
+            # Count the number of frames in which the tracks are present.
+            gt_id_count[gt_ids_t] += 1
+            tracker_id_count[tracker_ids_t] += 1
+            both_present_count[gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]] += 1
+        # Number of frames in which either track is present (union of the two sets of frames).
+        union_count = (gt_id_count[:, np.newaxis]
+                       + tracker_id_count[np.newaxis, :]
+                       - both_present_count)
+        # The denominator should always be non-zero if all tracks are non-empty.
+        with np.errstate(divide='raise', invalid='raise'):
+            temporal_iou = potential_matches_count / union_count
+        # Find assignment that maximizes temporal IOU.
+        match_rows, match_cols = linear_sum_assignment(-temporal_iou)
+        res['STDA'] = temporal_iou[match_rows, match_cols].sum()
+        res['VACE_IDs'] = data['num_tracker_ids']
+        res['VACE_GT_IDs'] = data['num_gt_ids']
+
+        # Obtain Frame Detection Accuracy (FDA) using per-frame correspondence.
+        non_empty_count = 0
+        fda = 0
+        for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
+            n_g = len(gt_ids_t)
+            n_d = len(tracker_ids_t)
+            if not (n_g or n_d):
+                continue
+            # n_g > 0 or n_d > 0
+            non_empty_count += 1
+            if not (n_g and n_d):
+                continue
+            # n_g > 0 and n_d > 0
+            spatial_overlap = data['similarity_scores'][t]
+            match_rows, match_cols = linear_sum_assignment(-spatial_overlap)
+            overlap_ratio = spatial_overlap[match_rows, match_cols].sum()
+            fda += overlap_ratio / (0.5 * (n_g + n_d))
+        res['FDA'] = fda
+        res['num_non_empty_timesteps'] = non_empty_count
+
+        res.update(self._compute_final_fields(res))
+        return res
+
+    def combine_classes_class_averaged(self, all_res, ignore_empty_classes=True):
+        """Combines metrics across all classes by averaging over the class values.
+        If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
+        """
+        res = {}
+        for field in self.fields:
+            if ignore_empty_classes:
+                res[field] = np.mean([v[field] for v in all_res.values()
+                                  if v['VACE_GT_IDs'] > 0 or v['VACE_IDs'] > 0], axis=0)
+            else:
+                res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
+        return res
+
+    def combine_classes_det_averaged(self, all_res):
+        """Combines metrics across all classes by averaging over the detection values"""
+        res = {}
+        for field in self._additive_fields:
+            res[field] = _BaseMetric._combine_sum(all_res, field)
+        res = self._compute_final_fields(res)
+        return res
+
+    def combine_sequences(self, all_res):
+        """Combines metrics across all sequences"""
+        res = {}
+        for header in self._additive_fields:
+            res[header] = _BaseMetric._combine_sum(all_res, header)
+        res.update(self._compute_final_fields(res))
+        return res
+
+    @staticmethod
+    def _compute_final_fields(additive):
+        final = {}
+        with np.errstate(invalid='ignore'):  # Permit nan results.
+            final['ATA'] = (additive['STDA'] /
+                            (0.5 * (additive['VACE_IDs'] + additive['VACE_GT_IDs'])))
+            final['SFDA'] = additive['FDA'] / additive['num_non_empty_timesteps']
+        return final
--- a/test/yolov7-tracker/tracker/trackeval/plotting.py
+++ b/test/yolov7-tracker/tracker/trackeval/plotting.py
@@ -0,0 +1,230 @@
+
+import os
+import numpy as np
+from .utils import TrackEvalException
+
+
+def plot_compare_trackers(tracker_folder, tracker_list, cls, output_folder, plots_list=None):
+    """Create plots which compare metrics across different trackers."""
+    # Define what to plot
+    if plots_list is None:
+        plots_list = get_default_plots_list()
+
+    # Load data
+    data = load_multiple_tracker_summaries(tracker_folder, tracker_list, cls)
+    out_loc = os.path.join(output_folder, cls)
+
+    # Plot
+    for args in plots_list:
+        create_comparison_plot(data, out_loc, *args)
+
+
+def get_default_plots_list():
+    # y_label, x_label, sort_label, bg_label, bg_function
+    plots_list = [
+        ['AssA', 'DetA', 'HOTA', 'HOTA', 'geometric_mean'],
+        ['AssPr', 'AssRe', 'HOTA', 'AssA', 'jaccard'],
+        ['DetPr', 'DetRe', 'HOTA', 'DetA', 'jaccard'],
+        ['HOTA(0)', 'LocA(0)', 'HOTA', 'HOTALocA(0)', 'multiplication'],
+        ['HOTA', 'LocA', 'HOTA', None, None],
+
+        ['HOTA', 'MOTA', 'HOTA', None, None],
+        ['HOTA', 'IDF1', 'HOTA', None, None],
+        ['IDF1', 'MOTA', 'HOTA', None, None],
+    ]
+    return plots_list
+
+
+def load_multiple_tracker_summaries(tracker_folder, tracker_list, cls):
+    """Loads summary data for multiple trackers."""
+    data = {}
+    for tracker in tracker_list:
+        with open(os.path.join(tracker_folder, tracker, cls + '_summary.txt')) as f:
+            keys = next(f).split(' ')
+            done = False
+            while not done:
+                values = next(f).split(' ')
+                if len(values) == len(keys):
+                    done = True
+            data[tracker] = dict(zip(keys, map(float, values)))
+    return data
+
+
+def create_comparison_plot(data, out_loc, y_label, x_label, sort_label, bg_label=None, bg_function=None, settings=None):
+    """ Creates a scatter plot comparing multiple trackers between two metric fields, with one on the x-axis and the
+    other on the y axis. Adds pareto optical lines and (optionally) a background contour.
+
+    Inputs:
+        data: dict of dicts such that data[tracker_name][metric_field_name] = float
+        y_label: the metric_field_name to be plotted on the y-axis
+        x_label: the metric_field_name to be plotted on the x-axis
+        sort_label: the metric_field_name by which trackers are ordered and ranked
+        bg_label: the metric_field_name by which (optional) background contours are plotted
+        bg_function: the (optional) function bg_function(x,y) which converts the x_label / y_label values into bg_label.
+        settings: dict of plot settings with keys:
+            'gap_val': gap between axis ticks and bg curves.
+            'num_to_plot': maximum number of trackers to plot
+    """
+
+    # Only loaded when run to reduce minimum requirements
+    from matplotlib import pyplot as plt
+
+    # Get plot settings
+    if settings is None:
+        gap_val = 2
+        num_to_plot = 20
+    else:
+        gap_val = settings['gap_val']
+        num_to_plot = settings['num_to_plot']
+
+    if (bg_label is None) != (bg_function is None):
+        raise TrackEvalException('bg_function and bg_label must either be both given or neither given.')
+
+    # Extract data
+    tracker_names = np.array(list(data.keys()))
+    sort_index = np.array([data[t][sort_label] for t in tracker_names]).argsort()[::-1]
+    x_values = np.array([data[t][x_label] for t in tracker_names])[sort_index][:num_to_plot]
+    y_values = np.array([data[t][y_label] for t in tracker_names])[sort_index][:num_to_plot]
+
+    # Print info on what is being plotted
+    tracker_names = tracker_names[sort_index][:num_to_plot]
+    print('\nPlotting %s vs %s, for the following (ordered) trackers:' % (y_label, x_label))
+    for i, name in enumerate(tracker_names):
+        print('%i: %s' % (i+1, name))
+
+    # Find best fitting boundaries for data
+    boundaries = _get_boundaries(x_values, y_values, round_val=gap_val/2)
+
+    fig = plt.figure()
+
+    # Plot background contour
+    if bg_function is not None:
+        _plot_bg_contour(bg_function, boundaries, gap_val)
+
+    # Plot pareto optimal lines
+    _plot_pareto_optimal_lines(x_values, y_values)
+
+    # Plot data points with number labels
+    labels = np.arange(len(y_values)) + 1
+    plt.plot(x_values, y_values, 'b.', markersize=15)
+    for xx, yy, l in zip(x_values, y_values, labels):
+        plt.text(xx, yy, str(l), color="red", fontsize=15)
+
+    # Add extra explanatory text to plots
+    plt.text(0, -0.11, 'label order:\nHOTA', horizontalalignment='left', verticalalignment='center',
+             transform=fig.axes[0].transAxes, color="red", fontsize=12)
+    if bg_label is not None:
+        plt.text(1, -0.11, 'curve values:\n' + bg_label, horizontalalignment='right', verticalalignment='center',
+                 transform=fig.axes[0].transAxes, color="grey", fontsize=12)
+
+    plt.xlabel(x_label, fontsize=15)
+    plt.ylabel(y_label, fontsize=15)
+    title = y_label + ' vs ' + x_label
+    if bg_label is not None:
+        title += ' (' + bg_label + ')'
+    plt.title(title, fontsize=17)
+    plt.xticks(np.arange(0, 100, gap_val))
+    plt.yticks(np.arange(0, 100, gap_val))
+    min_x, max_x, min_y, max_y = boundaries
+    plt.xlim(min_x, max_x)
+    plt.ylim(min_y, max_y)
+    plt.gca().set_aspect('equal', adjustable='box')
+    plt.tight_layout()
+
+    os.makedirs(out_loc, exist_ok=True)
+    filename = os.path.join(out_loc, title.replace(' ', '_'))
+    plt.savefig(filename + '.pdf', bbox_inches='tight', pad_inches=0.05)
+    plt.savefig(filename + '.png', bbox_inches='tight', pad_inches=0.05)
+
+
+def _get_boundaries(x_values, y_values, round_val):
+    x1 = np.min(np.floor((x_values - 0.5) / round_val) * round_val)
+    x2 = np.max(np.ceil((x_values + 0.5) / round_val) * round_val)
+    y1 = np.min(np.floor((y_values - 0.5) / round_val) * round_val)
+    y2 = np.max(np.ceil((y_values + 0.5) / round_val) * round_val)
+    x_range = x2 - x1
+    y_range = y2 - y1
+    max_range = max(x_range, y_range)
+    x_center = (x1 + x2) / 2
+    y_center = (y1 + y2) / 2
+    min_x = max(x_center - max_range / 2, 0)
+    max_x = min(x_center + max_range / 2, 100)
+    min_y = max(y_center - max_range / 2, 0)
+    max_y = min(y_center + max_range / 2, 100)
+    return min_x, max_x, min_y, max_y
+
+
+def geometric_mean(x, y):
+    return np.sqrt(x * y)
+
+
+def jaccard(x, y):
+    x = x / 100
+    y = y / 100
+    return 100 * (x * y) / (x + y - x * y)
+
+
+def multiplication(x, y):
+    return x * y / 100
+
+
+bg_function_dict = {
+    "geometric_mean": geometric_mean,
+    "jaccard": jaccard,
+    "multiplication": multiplication,
+    }
+
+
+def _plot_bg_contour(bg_function, plot_boundaries, gap_val):
+    """ Plot background contour. """
+
+    # Only loaded when run to reduce minimum requirements
+    from matplotlib import pyplot as plt
+
+    # Plot background contour
+    min_x, max_x, min_y, max_y = plot_boundaries
+    x = np.arange(min_x, max_x, 0.1)
+    y = np.arange(min_y, max_y, 0.1)
+    x_grid, y_grid = np.meshgrid(x, y)
+    if bg_function in bg_function_dict.keys():
+        z_grid = bg_function_dict[bg_function](x_grid, y_grid)
+    else:
+        raise TrackEvalException("background plotting function '%s' is not defined." % bg_function)
+    levels = np.arange(0, 100, gap_val)
+    con = plt.contour(x_grid, y_grid, z_grid, levels, colors='grey')
+
+    def bg_format(val):
+        s = '{:1f}'.format(val)
+        return '{:.0f}'.format(val) if s[-1] == '0' else s
+
+    con.levels = [bg_format(val) for val in con.levels]
+    plt.clabel(con, con.levels, inline=True, fmt='%r', fontsize=8)
+
+
+def _plot_pareto_optimal_lines(x_values, y_values):
+    """ Plot pareto optimal lines """
+
+    # Only loaded when run to reduce minimum requirements
+    from matplotlib import pyplot as plt
+
+    # Plot pareto optimal lines
+    cxs = x_values
+    cys = y_values
+    best_y = np.argmax(cys)
+    x_pareto = [0, cxs[best_y]]
+    y_pareto = [cys[best_y], cys[best_y]]
+    t = 2
+    remaining = cxs > x_pareto[t - 1]
+    cys = cys[remaining]
+    cxs = cxs[remaining]
+    while len(cxs) > 0 and len(cys) > 0:
+        best_y = np.argmax(cys)
+        x_pareto += [x_pareto[t - 1], cxs[best_y]]
+        y_pareto += [cys[best_y], cys[best_y]]
+        t += 2
+        remaining = cxs > x_pareto[t - 1]
+        cys = cys[remaining]
+        cxs = cxs[remaining]
+    x_pareto.append(x_pareto[t - 1])
+    y_pareto.append(0)
+    plt.plot(np.array(x_pareto), np.array(y_pareto), '--r')
--- a/test/yolov7-tracker/tracker/trackeval/utils.py
+++ b/test/yolov7-tracker/tracker/trackeval/utils.py
@@ -0,0 +1,146 @@
+
+import os
+import csv
+import argparse
+from collections import OrderedDict
+
+
+def init_config(config, default_config, name=None):
+    """Initialise non-given config values with defaults"""
+    if config is None:
+        config = default_config
+    else:
+        for k in default_config.keys():
+            if k not in config.keys():
+                config[k] = default_config[k]
+    if name and config['PRINT_CONFIG']:
+        print('\n%s Config:' % name)
+        for c in config.keys():
+            print('%-20s : %-30s' % (c, config[c]))
+    return config
+
+
+def update_config(config):
+    """
+    Parse the arguments of a script and updates the config values for a given value if specified in the arguments.
+    :param config: the config to update
+    :return: the updated config
+    """
+    parser = argparse.ArgumentParser()
+    for setting in config.keys():
+        if type(config[setting]) == list or type(config[setting]) == type(None):
+            parser.add_argument("--" + setting, nargs='+')
+        else:
+            parser.add_argument("--" + setting)
+    args = parser.parse_args().__dict__
+    for setting in args.keys():
+        if args[setting] is not None:
+            if type(config[setting]) == type(True):
+                if args[setting] == 'True':
+                    x = True
+                elif args[setting] == 'False':
+                    x = False
+                else:
+                    raise Exception('Command line parameter ' + setting + 'must be True or False')
+            elif type(config[setting]) == type(1):
+                x = int(args[setting])
+            elif type(args[setting]) == type(None):
+                x = None
+            else:
+                x = args[setting]
+            config[setting] = x
+    return config
+
+
+def get_code_path():
+    """Get base path where code is"""
+    return os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+
+
+def validate_metrics_list(metrics_list):
+    """Get names of metric class and ensures they are unique, further checks that the fields within each metric class
+    do not have overlapping names.
+    """
+    metric_names = [metric.get_name() for metric in metrics_list]
+    # check metric names are unique
+    if len(metric_names) != len(set(metric_names)):
+        raise TrackEvalException('Code being run with multiple metrics of the same name')
+    fields = []
+    for m in metrics_list:
+        fields += m.fields
+    # check metric fields are unique
+    if len(fields) != len(set(fields)):
+        raise TrackEvalException('Code being run with multiple metrics with fields of the same name')
+    return metric_names
+
+
+def write_summary_results(summaries, cls, output_folder):
+    """Write summary results to file"""
+
+    fields = sum([list(s.keys()) for s in summaries], [])
+    values = sum([list(s.values()) for s in summaries], [])
+
+    # In order to remain consistent upon new fields being adding, for each of the following fields if they are present
+    # they will be output in the summary first in the order below. Any further fields will be output in the order each
+    # metric family is called, and within each family either in the order they were added to the dict (python >= 3.6) or
+    # randomly (python < 3.6).
+    default_order = ['HOTA', 'DetA', 'AssA', 'DetRe', 'DetPr', 'AssRe', 'AssPr', 'LocA', 'OWTA', 'HOTA(0)', 'LocA(0)',
+                     'HOTALocA(0)', 'MOTA', 'MOTP', 'MODA', 'CLR_Re', 'CLR_Pr', 'MTR', 'PTR', 'MLR', 'CLR_TP', 'CLR_FN',
+                     'CLR_FP', 'IDSW', 'MT', 'PT', 'ML', 'Frag', 'sMOTA', 'IDF1', 'IDR', 'IDP', 'IDTP', 'IDFN', 'IDFP',
+                     'Dets', 'GT_Dets', 'IDs', 'GT_IDs']
+    default_ordered_dict = OrderedDict(zip(default_order, [None for _ in default_order]))
+    for f, v in zip(fields, values):
+        default_ordered_dict[f] = v
+    for df in default_order:
+        if default_ordered_dict[df] is None:
+            del default_ordered_dict[df]
+    fields = list(default_ordered_dict.keys())
+    values = list(default_ordered_dict.values())
+
+    out_file = os.path.join(output_folder, cls + '_summary.txt')
+    os.makedirs(os.path.dirname(out_file), exist_ok=True)
+    with open(out_file, 'w', newline='') as f:
+        writer = csv.writer(f, delimiter=' ')
+        writer.writerow(fields)
+        writer.writerow(values)
+
+
+def write_detailed_results(details, cls, output_folder):
+    """Write detailed results to file"""
+    sequences = details[0].keys()
+    fields = ['seq'] + sum([list(s['COMBINED_SEQ'].keys()) for s in details], [])
+    out_file = os.path.join(output_folder, cls + '_detailed.csv')
+    os.makedirs(os.path.dirname(out_file), exist_ok=True)
+    with open(out_file, 'w', newline='') as f:
+        writer = csv.writer(f)
+        writer.writerow(fields)
+        for seq in sorted(sequences):
+            if seq == 'COMBINED_SEQ':
+                continue
+            writer.writerow([seq] + sum([list(s[seq].values()) for s in details], []))
+        writer.writerow(['COMBINED'] + sum([list(s['COMBINED_SEQ'].values()) for s in details], []))
+
+
+def load_detail(file):
+    """Loads detailed data for a tracker."""
+    data = {}
+    with open(file) as f:
+        for i, row_text in enumerate(f):
+            row = row_text.replace('\r', '').replace('\n', '').split(',')
+            if i == 0:
+                keys = row[1:]
+                continue
+            current_values = row[1:]
+            seq = row[0]
+            if seq == 'COMBINED':
+                seq = 'COMBINED_SEQ'
+            if (len(current_values) == len(keys)) and seq != '':
+                data[seq] = {}
+                for key, value in zip(keys, current_values):
+                    data[seq][key] = float(value)
+    return data
+
+
+class TrackEvalException(Exception):
+    """Custom exception for catching expected errors."""
+    ...
--- a/test/yolov7-tracker/tracker/tracking_utils/envs.py
+++ b/test/yolov7-tracker/tracker/tracking_utils/envs.py
@@ -0,0 +1,36 @@
+"""
+set gpus and ramdom seeds
+"""
+
+import os
+import random
+import numpy as np
+from loguru import logger
+
+import torch
+import torch.backends.cudnn as cudnn
+
+def select_device(device):
+    """ set device 
+    Args:
+        device: str, 'cpu' or '0' or '1,2,3'-like
+
+    Return:
+        torch.device
+    
+    """
+
+    if device == 'cpu':
+        logger.info('Use CPU for training')
+
+    elif ',' in device:  # multi-gpu
+        logger.error('Multi-GPU currently not supported')
+    
+    else:
+        logger.info(f'set gpu {device}')
+        os.environ['CUDA_VISIBLE_DEVICES'] = device
+        assert torch.cuda.is_available()
+
+    cuda = device != 'cpu' and torch.cuda.is_available()
+    device = torch.device('cuda:0' if cuda else 'cpu')
+    return device
--- a/test/yolov7-tracker/tracker/tracking_utils/tools.py
+++ b/test/yolov7-tracker/tracker/tracking_utils/tools.py
@@ -0,0 +1,26 @@
+import numpy as np 
+import cv2 
+import os 
+
+def save_results(folder_name, seq_name, results, data_type='default'):
+    """
+    write results to txt file
+
+    results: list  row format: frame id, target id, box coordinate, class(optional)
+    to_file: file path(optional)
+    data_type: 'default' | 'mot_challenge', write data format, default or MOT submission
+    """
+    assert len(results)
+
+    if not os.path.exists(f'./track_results/{folder_name}'):
+
+        os.makedirs(f'./track_results/{folder_name}')
+
+    with open(os.path.join('./track_results', folder_name, seq_name + '.txt'), 'w') as f:
+        for frame_id, target_ids, tlwhs, clses, scores in results:
+            for id, tlwh, score in zip(target_ids, tlwhs, scores):
+                    f.write(f'{frame_id},{id},{tlwh[0]:.2f},{tlwh[1]:.2f},{tlwh[2]:.2f},{tlwh[3]:.2f},{score:.2f},-1,-1,-1\n')
+
+    f.close()
+
+    return folder_name
--- a/test/yolov7-tracker/tracker/tracking_utils/visualization.py
+++ b/test/yolov7-tracker/tracker/tracking_utils/visualization.py
@@ -0,0 +1,64 @@
+import cv2 
+import os 
+import numpy as np 
+from PIL import Image
+
+def plot_img(img, frame_id, results, save_dir):
+    """
+    img: np.ndarray: (H, W, C)
+    frame_id: int
+    results: [tlwhs, ids, clses]
+    save_dir: sr
+
+    plot images with bboxes of a seq
+    """
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+
+    assert img is not None
+
+    if len(img.shape) > 3:
+        img = img.squeeze(0)
+
+    img_ = np.ascontiguousarray(np.copy(img))
+
+    tlwhs, ids, clses = results[0], results[1], results[2]
+    for tlwh, id, cls in zip(tlwhs, ids, clses):
+
+        # convert tlwh to tlbr
+        tlbr = tuple([int(tlwh[0]), int(tlwh[1]), int(tlwh[0] + tlwh[2]), int(tlwh[1] + tlwh[3])])
+        # draw a rect
+        cv2.rectangle(img_, tlbr[:2], tlbr[2:], get_color(id), thickness=3, )
+        # note the id and cls
+        text = f'{int(cls)}_{id}'
+        cv2.putText(img_, text, (tlbr[0], tlbr[1]), fontFace=cv2.FONT_HERSHEY_PLAIN, fontScale=1, 
+                        color=(255, 164, 0), thickness=2)
+
+    cv2.imwrite(filename=os.path.join(save_dir, f'{frame_id:05d}.jpg'), img=img_)
+
+def get_color(idx):
+    """
+    aux func for plot_seq
+    get a unique color for each id
+    """
+    idx = idx * 3
+    color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
+
+    return color
+
+def save_video(images_path):
+    """
+    save images (frames) to a video
+    """
+
+    images_list = sorted(os.listdir(images_path))
+    save_video_path = os.path.join(images_path, images_path.split('/')[-1] + '.mp4')
+
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+
+    img0 = Image.open(os.path.join(images_path, images_list[0]))
+    vw = cv2.VideoWriter(save_video_path, fourcc, 15, img0.size)
+
+    for image_name in images_list:
+        image = cv2.imread(filename=os.path.join(images_path, image_name))
+        vw.write(image)
--- a/test/yolov7-tracker/tracker/yolov7_utils/postprocess.py
+++ b/test/yolov7-tracker/tracker/yolov7_utils/postprocess.py
@@ -0,0 +1,16 @@
+from utils.general import non_max_suppression, scale_coords
+
+def postprocess(out, conf_thresh, nms_thresh, img_size, ori_img_size):
+    """
+    Args:
+        out: out from v7 model
+        det_config: configs
+    """
+    
+    out = out[0]
+    out = non_max_suppression(out, conf_thresh, nms_thresh, )[0]
+    out[:, :4] = scale_coords(img_size, out[:, :4], ori_img_size, ratio_pad=None).round()
+
+    # out: tlbr, conf, cls
+
+    return out
--- a/test/yolov7-tracker/tracker/yolov8_utils/data_cfgs/airmot.yaml
+++ b/test/yolov7-tracker/tracker/yolov8_utils/data_cfgs/airmot.yaml
@@ -0,0 +1,7 @@
+train: /data/wujiapeng/codes/DroneGraphTracker/airmot/train.txt
+val: /data/wujiapeng/codes/DroneGraphTracker/airmot/test.txt
+test: /data/wujiapeng/codes/DroneGraphTracker/airmot/test.txt
+
+nc: 2
+
+names: ['plane', 'ship']
--- a/test/yolov7-tracker/tracker/yolov8_utils/data_cfgs/uavdt.yaml
+++ b/test/yolov7-tracker/tracker/yolov8_utils/data_cfgs/uavdt.yaml
@@ -0,0 +1,7 @@
+train: /data/wujiapeng/codes/DroneGraphTracker/uavdt/train.txt
+val: /data/wujiapeng/codes/DroneGraphTracker/uavdt/test.txt
+test: /data/wujiapeng/codes/DroneGraphTracker/uavdt/test.txt
+
+nc: 1
+
+names: ['car']
--- a/test/yolov7-tracker/tracker/yolov8_utils/data_cfgs/visdrone.yaml
+++ b/test/yolov7-tracker/tracker/yolov8_utils/data_cfgs/visdrone.yaml
@@ -0,0 +1,7 @@
+train: /data/wujiapeng/codes/DroneGraphTracker/visdrone/train.txt
+val: /data/wujiapeng/codes/DroneGraphTracker/visdrone/test.txt
+test: /data/wujiapeng/codes/DroneGraphTracker/visdrone/test.txt
+
+nc: 5
+
+names: ['pedestrain', 'car', 'van', 'truck', 'bus']
--- a/test/yolov7-tracker/tracker/yolov8_utils/data_cfgs/visdrone_det.yaml
+++ b/test/yolov7-tracker/tracker/yolov8_utils/data_cfgs/visdrone_det.yaml
@@ -0,0 +1,7 @@
+train: /data/wujiapeng/codes/DroneGraphTracker/visdrone_det/train.txt
+val: /data/wujiapeng/codes/DroneGraphTracker/visdrone_det/test.txt
+test: /data/wujiapeng/codes/DroneGraphTracker/visdrone_det/test.txt
+
+nc: 5
+
+names: ['pedestrain', 'car', 'van', 'truck', 'bus']
--- a/test/yolov7-tracker/tracker/yolov8_utils/postprocess.py
+++ b/test/yolov7-tracker/tracker/yolov8_utils/postprocess.py
@@ -0,0 +1,6 @@
+from ultralytics import YOLO
+
+def postprocess(out):
+
+    out = out[0].boxes
+    return out.data
--- a/test/yolov7-tracker/tracker/yolov8_utils/train_yolov8.py
+++ b/test/yolov7-tracker/tracker/yolov8_utils/train_yolov8.py
@@ -0,0 +1,36 @@
+import torch 
+from ultralytics import YOLO 
+import numpy as np  
+
+import argparse
+
+def main(args):
+    """ main func
+    
+    """
+
+    model = YOLO(model=args.model_weight)
+    model.train(
+        data=args.data_cfg,
+        epochs=args.epochs,
+        batch=args.batch_size,
+        imgsz=args.img_sz,
+        patience=50,  # epochs to wait for no observable improvement for early stopping of training
+        device=args.device,
+    )
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser("YOLO v8 train parser")
+    
+    parser.add_argument('--model', type=str, default='yolov8s.yaml', help='yaml or pt file')
+    parser.add_argument('--model_weight', type=str, default='yolov8s.pt', help='')
+    parser.add_argument('--data_cfg', type=str, default='yolov8_utils/data_cfgs/visdrone.yaml', help='')
+    parser.add_argument('--epochs', type=int, default=30, help='')
+    parser.add_argument('--batch_size', type=int, default=8, help='')
+    parser.add_argument('--img_sz', type=int, default=1280, help='')
+    parser.add_argument('--device', type=str, default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+
+    args = parser.parse_args()
+
+    main(args)
--- a/test/yolov7-tracker/tracker/yolox_utils/mot_dataset.py
+++ b/test/yolov7-tracker/tracker/yolox_utils/mot_dataset.py
@@ -0,0 +1,155 @@
+import cv2
+import numpy as np
+from pycocotools.coco import COCO
+
+import os
+
+from yolox.data.datasets import Dataset
+
+
+class MOTDataset(Dataset):
+    """
+    COCO dataset class.
+    """
+
+    def __init__(
+        self,
+        data_dir=None,
+        json_file="train_half.json",
+        name="train",
+        img_size=(608, 1088),
+        preproc=None,
+    ):
+        """
+        COCO dataset initialization. Annotation data are read into memory by COCO API.
+        Args:
+            data_dir (str): dataset root directory
+            json_file (str): COCO json file name
+            name (str): COCO data name (e.g. 'train2017' or 'val2017')
+            img_size (int): target image size after pre-processing
+            preproc: data augmentation strategy
+        """
+        super().__init__(img_size)
+
+        self.data_dir = data_dir
+        self.json_file = json_file
+
+        self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file))
+        self.ids = self.coco.getImgIds()
+        self.class_ids = sorted(self.coco.getCatIds())
+        cats = self.coco.loadCats(self.coco.getCatIds())
+        self._classes = tuple([c["name"] for c in cats])
+        self.annotations = self._load_coco_annotations()
+        self.name = name
+        self.img_size = img_size
+        self.preproc = preproc
+
+    def __len__(self):
+        return len(self.ids)
+
+    def _load_coco_annotations(self):
+        return [self.load_anno_from_ids(_ids) for _ids in self.ids]
+
+    def load_anno_from_ids(self, id_):
+        im_ann = self.coco.loadImgs(id_)[0]
+        width = im_ann["width"]
+        height = im_ann["height"]
+        frame_id = im_ann["frame_id"]
+        video_id = im_ann["video_id"]
+        anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False)
+        annotations = self.coco.loadAnns(anno_ids)
+        objs = []
+        for obj in annotations:
+            x1 = obj["bbox"][0]
+            y1 = obj["bbox"][1]
+            x2 = x1 + obj["bbox"][2]
+            y2 = y1 + obj["bbox"][3]
+            if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
+                obj["clean_bbox"] = [x1, y1, x2, y2]
+                objs.append(obj)
+
+        num_objs = len(objs)
+
+        res = np.zeros((num_objs, 6))
+
+        for ix, obj in enumerate(objs):
+            cls = self.class_ids.index(obj["category_id"])
+            res[ix, 0:4] = obj["clean_bbox"]
+            res[ix, 4] = cls
+            res[ix, 5] = obj["track_id"]
+
+        file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg"
+        img_info = (height, width, frame_id, video_id, file_name)
+
+        del im_ann, annotations
+
+        return (res, img_info, file_name)
+
+    def load_anno(self, index):
+        return self.annotations[index][0]
+
+    def pull_item(self, index):
+        id_ = self.ids[index]
+
+        res, img_info, file_name = self.annotations[index]
+        # load image and preprocess
+        img_file = os.path.join(
+            self.data_dir, 'images', self.name, file_name
+        )
+        # for debug
+        # print(f"************{img_file}************")
+        # exit()
+        img = cv2.imread(img_file)
+        assert img is not None
+
+        return img, res.copy(), img_info, np.array([id_])
+
+    @Dataset.resize_getitem
+    def __getitem__(self, index):
+        """
+        One image / label pair for the given index is picked up and pre-processed.
+
+        Args:
+            index (int): data index
+
+        Returns:
+            img (numpy.ndarray): pre-processed image
+            padded_labels (torch.Tensor): pre-processed label data.
+                The shape is :math:`[max_labels, 5]`.
+                each label consists of [class, xc, yc, w, h]:
+                    class (float): class index.
+                    xc, yc (float) : center of bbox whose values range from 0 to 1.
+                    w, h (float) : size of bbox whose values range from 0 to 1.
+            info_img : tuple of h, w, nh, nw, dx, dy.
+                h, w (int): original shape of the image
+                nh, nw (int): shape of the resized image without padding
+                dx, dy (int): pad size
+            img_id (int): same as the input index. Used for evaluation.
+        """
+        img, target, img_info, img_id = self.pull_item(index)
+
+        if self.preproc is not None:
+            img, target = self.preproc(img, target, self.input_dim)
+        return img, target, img_info, img_id
+
+class VisDroneDataset(MOTDataset):
+    def __init__(self, data_dir=None, json_file="train_half.json", name="train", img_size=(608, 1088), preproc=None):
+        super().__init__(data_dir, json_file, name, img_size, preproc)
+        self.DATA_ROOT = '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019'
+        self.VisD_dict = {'train':'VisDrone2019-MOT-train',
+                          'test':'VisDrone2019-MOT-test-dev'}
+    def pull_item(self, index):
+        id_ = self.ids[index]
+
+        res, img_info, file_name = self.annotations[index]
+        # load image and preprocess
+        # img_file = os.path.join(
+        #     self.data_dir, self.name, file_name
+        # )
+        img_file = os.path.join(
+            self.DATA_ROOT, self.VisD_dict[self.name], 'sequences', file_name
+        )
+        img = cv2.imread(img_file)
+        assert img is not None
+
+        return img, res.copy(), img_info, np.array([id_])
--- a/test/yolov7-tracker/tracker/yolox_utils/postprocess.py
+++ b/test/yolov7-tracker/tracker/yolox_utils/postprocess.py
@@ -0,0 +1,29 @@
+import torch 
+
+from yolox.utils import postprocess
+
+def postprocess_yolox(out, num_classes, conf_thresh, img, ori_img):
+    """
+    convert out to  -> (tlbr, conf, cls)
+    """
+
+    out = postprocess(out, num_classes, conf_thresh, )[0]  # (tlbr, obj_conf, cls_conf, cls)
+
+    if out is None: return out
+
+    # merge conf 
+    out[:, 4] *= out[:, 5]
+    out[:, 5] = out[:, -1]
+    out = out[:, :-1]
+
+    # scale to origin size 
+
+    img_size = [img.shape[-2], img.shape[-1]]  # h, w
+    ori_img_size = [ori_img.shape[0], ori_img.shape[1]]  # h0, w0
+    img_h, img_w = img_size[0], img_size[1]
+
+    scale = min(float(img_h) / ori_img_size[0], float(img_w) / ori_img_size[1])
+
+    out[:, :4] /= scale 
+
+    return out
--- a/test/yolov7-tracker/tracker/yolox_utils/train_yolox.py
+++ b/test/yolov7-tracker/tracker/yolox_utils/train_yolox.py
@@ -0,0 +1,122 @@
+from loguru import logger
+
+import torch
+import torch.backends.cudnn as cudnn
+
+from yolox.core import Trainer, launch
+from yolox.exp import get_exp
+
+import argparse
+import random
+import warnings
+
+
+def make_parser():
+    parser = argparse.ArgumentParser("YOLOX train parser")
+    parser.add_argument("-expn", "--experiment-name", type=str, default=None)
+    parser.add_argument("-n", "--name", type=str, default=None, help="model name")
+
+    # distributed
+    parser.add_argument(
+        "--dist-backend", default="nccl", type=str, help="distributed backend"
+    )
+    parser.add_argument(
+        "--dist-url",
+        default=None,
+        type=str,
+        help="url used to set up distributed training",
+    )
+    parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size")
+    parser.add_argument(
+        "-d", "--devices", default=None, type=int, help="device for training"
+    )
+    parser.add_argument(
+        "--local_rank", default=0, type=int, help="local rank for dist training"
+    )
+    parser.add_argument(
+        "-f",
+        "--exp_file",
+        default=None,
+        type=str,
+        help="plz input your expriment description file",
+    )
+    parser.add_argument(
+        "--resume", default=False, action="store_true", help="resume training"
+    )
+    parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file")
+    parser.add_argument(
+        "-e",
+        "--start_epoch",
+        default=None,
+        type=int,
+        help="resume training start epoch",
+    )
+    parser.add_argument(
+        "--num_machines", default=1, type=int, help="num of node for training"
+    )
+    parser.add_argument(
+        "--machine_rank", default=0, type=int, help="node rank for multi-node training"
+    )
+    parser.add_argument(
+        "--fp16",
+        dest="fp16",
+        default=True,
+        action="store_true",
+        help="Adopting mix precision training.",
+    )
+    parser.add_argument(
+        "-o",
+        "--occupy",
+        dest="occupy",
+        default=False,
+        action="store_true",
+        help="occupy GPU memory first for training.",
+    )
+    parser.add_argument(
+        "opts",
+        help="Modify config options using the command-line",
+        default=None,
+        nargs=argparse.REMAINDER,
+    )
+    return parser
+
+
+@logger.catch
+def main(exp, args):
+    if exp.seed is not None:
+        random.seed(exp.seed)
+        torch.manual_seed(exp.seed)
+        cudnn.deterministic = True
+        warnings.warn(
+            "You have chosen to seed training. This will turn on the CUDNN deterministic setting, "
+            "which can slow down your training considerably! You may see unexpected behavior "
+            "when restarting from checkpoints."
+        )
+
+    # set environment variables for distributed training
+    cudnn.benchmark = True
+
+    trainer = Trainer(exp, args)
+    trainer.train()
+
+
+if __name__ == "__main__":
+    args = make_parser().parse_args()
+    exp = get_exp(args.exp_file, args.name)
+    exp.merge(args.opts)
+
+    if not args.experiment_name:
+        args.experiment_name = exp.exp_name
+
+    num_gpu = torch.cuda.device_count() if args.devices is None else args.devices
+    assert num_gpu <= torch.cuda.device_count()
+
+    launch(
+        main,
+        num_gpu,
+        args.num_machines,
+        args.machine_rank,
+        backend=args.dist_backend,
+        dist_url=args.dist_url,
+        args=(exp, args),
+    )
--- a/test/yolov7-tracker/tracker/yolox_utils/yolox_m.py
+++ b/test/yolov7-tracker/tracker/yolox_utils/yolox_m.py
@@ -0,0 +1,144 @@
+# encoding: utf-8
+import os
+import random
+import torch
+import torch.nn as nn
+import torch.distributed as dist
+
+from yolox.exp import Exp as MyExp
+from yolox.data import get_yolox_datadir
+
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.num_classes = 1  # 1 for uavdt mot17
+        self.depth = 0.67
+        self.width = 0.75
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+        self.train_ann = "train.json"
+        self.val_ann = "test.json"
+        self.input_size = (800, 1440)
+        self.test_size = (800, 1440)
+        self.random_size = (18, 32)
+        self.max_epoch = 80
+        self.print_interval = 20
+        self.eval_interval = 5
+        self.test_conf = 0.001
+        self.nmsthre = 0.7
+        self.no_aug_epochs = 10
+        self.basic_lr_per_img = 0.001 / 64.0
+        self.warmup_epochs = 1
+
+    def get_data_loader(self, batch_size, is_distributed, no_aug=False):
+        from yolox.data import (
+            TrainTransform,
+            YoloBatchSampler,
+            DataLoader,
+            InfiniteSampler,
+            MosaicDetection,
+        )
+
+        from mot_dataset import MOTDataset
+
+        dataset = MOTDataset(
+            # data_dir=os.path.join(get_yolox_datadir(), "mot"),
+            # data_dir='/data/wujiapeng/datasets/UAVDT',
+            data_dir='/data/wujiapeng/datasets/VisDrone2019/VisDrone2019',
+            json_file=self.train_ann,
+            name='train',
+            img_size=self.input_size,
+            preproc=TrainTransform(
+                rgb_means=(0.485, 0.456, 0.406),
+                std=(0.229, 0.224, 0.225),
+                max_labels=500,
+            ),
+        )
+
+        dataset = MosaicDetection(
+            dataset,
+            mosaic=not no_aug,
+            img_size=self.input_size,
+            preproc=TrainTransform(
+                rgb_means=(0.485, 0.456, 0.406),
+                std=(0.229, 0.224, 0.225),
+                max_labels=1000,
+            ),
+            degrees=self.degrees,
+            translate=self.translate,
+            scale=self.scale,
+            shear=self.shear,
+            perspective=self.perspective,
+            enable_mixup=self.enable_mixup,
+        )
+
+        self.dataset = dataset
+
+        if is_distributed:
+            batch_size = batch_size // dist.get_world_size()
+
+        sampler = InfiniteSampler(
+            len(self.dataset), seed=self.seed if self.seed else 0
+        )
+
+        batch_sampler = YoloBatchSampler(
+            sampler=sampler,
+            batch_size=batch_size,
+            drop_last=False,
+            input_dimension=self.input_size,
+            mosaic=not no_aug,
+        )
+
+        dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
+        dataloader_kwargs["batch_sampler"] = batch_sampler
+        train_loader = DataLoader(self.dataset, **dataloader_kwargs)
+
+        return train_loader
+
+    def get_eval_loader(self, batch_size, is_distributed, testdev=False):
+        from yolox.data import ValTransform
+        from mot_dataset import MOTDataset
+
+        valdataset = MOTDataset(
+            # data_dir=os.path.join(get_yolox_datadir(), "mot"),
+            # data_dir='/data/wujiapeng/datasets/UAVDT',
+            data_dir='/data/wujiapeng/datasets/VisDrone2019/VisDrone2019',
+            json_file=self.val_ann,
+            img_size=self.test_size,
+            name='test',
+            preproc=ValTransform(
+                rgb_means=(0.485, 0.456, 0.406),
+                std=(0.229, 0.224, 0.225),
+            ),
+        )
+
+        if is_distributed:
+            batch_size = batch_size // dist.get_world_size()
+            sampler = torch.utils.data.distributed.DistributedSampler(
+                valdataset, shuffle=False
+            )
+        else:
+            sampler = torch.utils.data.SequentialSampler(valdataset)
+
+        dataloader_kwargs = {
+            "num_workers": self.data_num_workers,
+            "pin_memory": True,
+            "sampler": sampler,
+        }
+        dataloader_kwargs["batch_size"] = batch_size
+        val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
+
+        return val_loader
+
+    def get_evaluator(self, batch_size, is_distributed, testdev=False):
+        from yolox.evaluators import COCOEvaluator
+
+        val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
+        evaluator = COCOEvaluator(
+            dataloader=val_loader,
+            img_size=self.test_size,
+            confthre=self.test_conf,
+            nmsthre=self.nmsthre,
+            num_classes=self.num_classes,
+            testdev=testdev,
+        )
+        return evaluator
--- a/test/yolov7-tracker/tracker/yolox_utils/yolox_x.py
+++ b/test/yolov7-tracker/tracker/yolox_utils/yolox_x.py
@@ -0,0 +1,142 @@
+# encoding: utf-8
+import os
+import random
+import torch
+import torch.nn as nn
+import torch.distributed as dist
+
+from yolox.exp import Exp as MyExp
+from yolox.data import get_yolox_datadir
+
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.num_classes = 1
+        self.depth = 1.33
+        self.width = 1.25
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+        self.train_ann = "train.json"
+        self.val_ann = "test.json"
+        self.input_size = (800, 1440)
+        self.test_size = (800, 1440)
+        self.random_size = (18, 32)
+        self.max_epoch = 80
+        self.print_interval = 20
+        self.eval_interval = 5
+        self.test_conf = 0.001
+        self.nmsthre = 0.7
+        self.no_aug_epochs = 10
+        self.basic_lr_per_img = 0.001 / 64.0
+        self.warmup_epochs = 1
+
+    def get_data_loader(self, batch_size, is_distributed, no_aug=False):
+        from yolox.data import (
+            TrainTransform,
+            YoloBatchSampler,
+            DataLoader,
+            InfiniteSampler,
+            MosaicDetection,
+        )
+
+        from mot_dataset import MOTDataset
+
+        dataset = MOTDataset(
+            # data_dir=os.path.join(get_yolox_datadir(), "mot"),
+            data_dir='/data/wujiapeng/datasets/UAVDT',
+            json_file=self.train_ann,
+            name='train',
+            img_size=self.input_size,
+            preproc=TrainTransform(
+                rgb_means=(0.485, 0.456, 0.406),
+                std=(0.229, 0.224, 0.225),
+                max_labels=500,
+            ),
+        )
+
+        dataset = MosaicDetection(
+            dataset,
+            mosaic=not no_aug,
+            img_size=self.input_size,
+            preproc=TrainTransform(
+                rgb_means=(0.485, 0.456, 0.406),
+                std=(0.229, 0.224, 0.225),
+                max_labels=1000,
+            ),
+            degrees=self.degrees,
+            translate=self.translate,
+            scale=self.scale,
+            shear=self.shear,
+            perspective=self.perspective,
+            enable_mixup=self.enable_mixup,
+        )
+
+        self.dataset = dataset
+
+        if is_distributed:
+            batch_size = batch_size // dist.get_world_size()
+
+        sampler = InfiniteSampler(
+            len(self.dataset), seed=self.seed if self.seed else 0
+        )
+
+        batch_sampler = YoloBatchSampler(
+            sampler=sampler,
+            batch_size=batch_size,
+            drop_last=False,
+            input_dimension=self.input_size,
+            mosaic=not no_aug,
+        )
+
+        dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
+        dataloader_kwargs["batch_sampler"] = batch_sampler
+        train_loader = DataLoader(self.dataset, **dataloader_kwargs)
+
+        return train_loader
+
+    def get_eval_loader(self, batch_size, is_distributed, testdev=False):
+        from yolox.data import ValTransform
+        from mot_dataset import MOTDataset
+
+        valdataset = MOTDataset(
+            # data_dir=os.path.join(get_yolox_datadir(), "mot"),
+            data_dir='/data/wujiapeng/datasets/UAVDT',
+            json_file=self.val_ann,
+            img_size=self.test_size,
+            name='test',
+            preproc=ValTransform(
+                rgb_means=(0.485, 0.456, 0.406),
+                std=(0.229, 0.224, 0.225),
+            ),
+        )
+
+        if is_distributed:
+            batch_size = batch_size // dist.get_world_size()
+            sampler = torch.utils.data.distributed.DistributedSampler(
+                valdataset, shuffle=False
+            )
+        else:
+            sampler = torch.utils.data.SequentialSampler(valdataset)
+
+        dataloader_kwargs = {
+            "num_workers": self.data_num_workers,
+            "pin_memory": True,
+            "sampler": sampler,
+        }
+        dataloader_kwargs["batch_size"] = batch_size
+        val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
+
+        return val_loader
+
+    def get_evaluator(self, batch_size, is_distributed, testdev=False):
+        from yolox.evaluators import COCOEvaluator
+
+        val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
+        evaluator = COCOEvaluator(
+            dataloader=val_loader,
+            img_size=self.test_size,
+            confthre=self.test_conf,
+            nmsthre=self.nmsthre,
+            num_classes=self.num_classes,
+            testdev=testdev,
+        )
+        return evaluator