clean

2026-02-28 08:05:45 +00:00 · 2024-07-18 00:42:59 +02:00
parent 5c9313c4ca
commit 3cf13b815c
180 changed files with 34499 additions and 2 deletions
--- a/yolov7-tracker-example/tracker/trackers/basetrack.py
+++ b/yolov7-tracker-example/tracker/trackers/basetrack.py
@@ -0,0 +1,133 @@
+import numpy as np
+from collections import OrderedDict
+
+
+class TrackState(object):
+    New = 0
+    Tracked = 1
+    Lost = 2
+    Removed = 3
+
+
+class BaseTrack(object):
+    _count = 0
+
+    track_id = 0
+    is_activated = False
+    state = TrackState.New
+
+    history = OrderedDict()
+    features = []
+    curr_feature = None
+    score = 0
+    start_frame = 0
+    frame_id = 0
+    time_since_update = 0
+
+    # multi-camera
+    location = (np.inf, np.inf)
+
+    @property
+    def end_frame(self):
+        return self.frame_id
+
+    @staticmethod
+    def next_id():
+        BaseTrack._count += 1
+        return BaseTrack._count
+
+    def activate(self, *args):
+        raise NotImplementedError
+
+    def predict(self):
+        raise NotImplementedError
+
+    def update(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def mark_lost(self):
+        self.state = TrackState.Lost
+
+    def mark_removed(self):
+        self.state = TrackState.Removed
+        
+    @property
+    def tlwh(self):
+        """Get current position in bounding box format `(top left x, top left y,
+                width, height)`.
+        """
+        if self.mean is None:
+            return self._tlwh.copy()
+        ret = self.mean[:4].copy()
+        ret[:2] -= ret[2:] / 2
+        return ret
+
+    @property
+    def tlbr(self):
+        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        ret[2:] += ret[:2]
+        return ret
+    @property
+    def xywh(self):
+        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        ret[:2] += ret[2:] / 2.0
+        return ret
+
+    @staticmethod
+    # @jit(nopython=True)
+    def tlwh_to_xyah(tlwh):
+        """Convert bounding box to format `(center x, center y, aspect ratio,
+        height)`, where the aspect ratio is `width / height`.
+        """
+        ret = np.asarray(tlwh).copy()
+        ret[:2] += ret[2:] / 2
+        ret[2] /= ret[3]
+        return ret
+
+    @staticmethod
+    def tlwh_to_xywh(tlwh):
+        """Convert bounding box to format `(center x, center y, width,
+        height)`.
+        """
+        ret = np.asarray(tlwh).copy()
+        ret[:2] += ret[2:] / 2
+        return ret
+    
+    @staticmethod
+    def tlwh_to_xysa(tlwh):
+        """Convert bounding box to format `(center x, center y, width,
+        height)`.
+        """
+        ret = np.asarray(tlwh).copy()
+        ret[:2] += ret[2:] / 2
+        ret[2] = tlwh[2] * tlwh[3]
+        ret[3] = tlwh[2] / tlwh[3]
+        return ret
+    
+    def to_xyah(self):
+        return self.tlwh_to_xyah(self.tlwh)
+    
+    def to_xywh(self):
+        return self.tlwh_to_xywh(self.tlwh)
+
+    @staticmethod
+    def tlbr_to_tlwh(tlbr):
+        ret = np.asarray(tlbr).copy()
+        ret[2:] -= ret[:2]
+        return ret
+
+    @staticmethod
+    # @jit(nopython=True)
+    def tlwh_to_tlbr(tlwh):
+        ret = np.asarray(tlwh).copy()
+        ret[2:] += ret[:2]
+        return ret
+
+    def __repr__(self):
+        return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
--- a/yolov7-tracker-example/tracker/trackers/botsort_tracker.py
+++ b/yolov7-tracker-example/tracker/trackers/botsort_tracker.py
@@ -0,0 +1,329 @@
+"""
+Bot sort
+"""
+
+import numpy as np  
+import torch 
+from torchvision.ops import nms
+
+import cv2 
+import torchvision.transforms as T
+
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet, Tracklet_w_reid
+from .matching import *
+
+from .reid_models.OSNet import *
+from .reid_models.load_model_tools import load_pretrained_weights
+from .reid_models.deepsort_reid import Extractor
+
+from .camera_motion_compensation import GMC
+
+REID_MODEL_DICT = {
+    'osnet_x1_0': osnet_x1_0, 
+    'osnet_x0_75': osnet_x0_75, 
+    'osnet_x0_5': osnet_x0_5, 
+    'osnet_x0_25': osnet_x0_25, 
+    'deepsort': Extractor
+}
+
+
+def load_reid_model(reid_model, reid_model_path):
+    
+    if 'osnet' in reid_model:
+        func = REID_MODEL_DICT[reid_model]
+        model = func(num_classes=1, pretrained=False, )
+        load_pretrained_weights(model, reid_model_path)
+        model.cuda().eval()
+        
+    elif 'deepsort' in reid_model:
+        model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
+
+    else:
+        raise NotImplementedError
+    
+    return model
+
+class BotTracker(object):
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+        self.with_reid = not args.discard_reid
+
+        self.reid_model, self.crop_transforms = None, None 
+        if self.with_reid:
+            self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
+            self.crop_transforms = T.Compose([
+            # T.ToPILImage(),
+            # T.Resize(size=(256, 128)),
+            T.ToTensor(),  # (c, 128, 256)
+            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        ])
+            
+
+        # camera motion compensation module
+        self.gmc = GMC(method='orb', downscale=2, verbose=None)
+
+    def reid_preprocess(self, obj_bbox):
+        """
+        preprocess cropped object bboxes 
+        
+        obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
+
+        return: 
+        torch.Tensor of shape (c, 128, 256)
+        """
+        obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=(128, 128))  # shape: (128, 256, c)
+
+        return self.crop_transforms(obj_bbox)
+
+    def get_feature(self, tlwhs, ori_img):
+        """
+        get apperance feature of an object
+        tlwhs: shape (num_of_objects, 4)
+        ori_img: original image, np.ndarray, shape(H, W, C)
+        """
+        obj_bbox = []
+
+        for tlwh in tlwhs:
+            tlwh = list(map(int, tlwh))
+            # if any(tlbr_ == -1 for tlbr_ in tlwh):
+            #     print(tlwh)
+            
+            tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
+            obj_bbox.append(tlbr_tensor)
+        
+        if not obj_bbox:
+            return np.array([])
+        
+        obj_bbox = torch.stack(obj_bbox, dim=0)
+        obj_bbox = obj_bbox.cuda()  
+        
+        features = self.reid_model(obj_bbox)  # shape: (num_of_objects, feature_dim)
+        return features.cpu().detach().numpy()
+
+
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlwh format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+        inds_low = scores > 0.1
+        inds_high = scores < self.args.conf_thresh
+
+        inds_second = np.logical_and(inds_low, inds_high)
+        dets_second = bboxes[inds_second]
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        cates_second = categories[inds_second]
+        
+        scores_keep = scores[remain_inds]
+        scores_second = scores[inds_second]
+
+        """Step 1: Extract reid features"""
+        if self.with_reid:
+            features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
+
+        if len(dets) > 0:
+            if self.with_reid:
+                detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
+                            (tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
+            else:
+                detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
+                            (tlwh, s, cate) in zip(dets, scores_keep, cates)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with high score detection boxes'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        # Camera motion compensation
+        warp = self.gmc.apply(ori_img, dets)
+        self.gmc.multi_gmc(tracklet_pool, warp)
+        self.gmc.multi_gmc(unconfirmed, warp)
+
+        ious_dists = iou_distance(tracklet_pool, detections)
+        ious_dists_mask = (ious_dists > 0.5)  # high conf iou
+
+        if self.with_reid:
+            # mixed cost matrix
+            emb_dists = embedding_distance(tracklet_pool, detections) / 2.0
+            raw_emb_dists = emb_dists.copy()
+            emb_dists[emb_dists > 0.25] = 1.0
+            emb_dists[ious_dists_mask] = 1.0
+            dists = np.minimum(ious_dists, emb_dists)
+
+        else:
+            dists = ious_dists
+        
+        matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        ''' Step 3: Second association, with low score detection boxes'''
+        # association the untrack to the low score detections
+        if len(dets_second) > 0:
+            '''Detections'''
+            detections_second = [Tracklet(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
+        else:
+            detections_second = []
+
+        r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+        dists = iou_distance(r_tracked_tracklets, detections_second)
+        matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
+        for itracked, idet in matches:
+            track = r_tracked_tracklets[itracked]
+            det = detections_second[idet]
+            if track.state == TrackState.Tracked:
+                track.update(det, self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        for it in u_track:
+            track = r_tracked_tracklets[it]
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detections[i] for i in u_detection]
+        ious_dists = iou_distance(unconfirmed, detections)
+        ious_dists_mask = (ious_dists > 0.5)
+
+        if self.with_reid:
+            emb_dists = embedding_distance(unconfirmed, detections) / 2.0
+            raw_emb_dists = emb_dists.copy()
+            emb_dists[emb_dists > 0.25] = 1.0
+            emb_dists[ious_dists_mask] = 1.0
+            dists = np.minimum(ious_dists, emb_dists)
+        else:
+            dists = ious_dists
+
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/yolov7-tracker-example/tracker/trackers/byte_tracker.py
+++ b/yolov7-tracker-example/tracker/trackers/byte_tracker.py
@@ -0,0 +1,201 @@
+"""
+ByteTrack
+"""
+
+import numpy as np
+from collections import deque
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet
+from .matching import *
+
+class ByteTracker(object):
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlbr format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+        inds_low = scores > 0.1
+        inds_high = scores < self.args.conf_thresh
+
+        inds_second = np.logical_and(inds_low, inds_high)
+        dets_second = bboxes[inds_second]
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        cates_second = categories[inds_second]
+        
+        scores_keep = scores[remain_inds]
+        scores_second = scores[inds_second]
+
+        if len(dets) > 0:
+            '''Detections'''
+            detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets, scores_keep, cates)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with high score detection boxes'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        dists = iou_distance(tracklet_pool, detections)
+        
+        matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        ''' Step 3: Second association, with low score detection boxes'''
+        # association the untrack to the low score detections
+        if len(dets_second) > 0:
+            '''Detections'''
+            detections_second = [Tracklet(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
+        else:
+            detections_second = []
+        r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+        dists = iou_distance(r_tracked_tracklets, detections_second)
+        matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
+        for itracked, idet in matches:
+            track = r_tracked_tracklets[itracked]
+            det = detections_second[idet]
+            if track.state == TrackState.Tracked:
+                track.update(det, self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        for it in u_track:
+            track = r_tracked_tracklets[it]
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detections[i] for i in u_detection]
+        dists = iou_distance(unconfirmed, detections)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/yolov7-tracker-example/tracker/trackers/c_biou_tracker.py
+++ b/yolov7-tracker-example/tracker/trackers/c_biou_tracker.py
@@ -0,0 +1,204 @@
+"""
+C_BIoU Track
+"""
+
+import numpy as np
+from collections import deque
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet, Tracklet_w_bbox_buffer
+from .matching import *
+
+class C_BIoUTracker(object):
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlbr format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+        inds_low = scores > 0.1
+        inds_high = scores < self.args.conf_thresh
+
+        inds_second = np.logical_and(inds_low, inds_high)
+        dets_second = bboxes[inds_second]
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        cates_second = categories[inds_second]
+        
+        scores_keep = scores[remain_inds]
+        scores_second = scores[inds_second]
+
+        if len(dets) > 0:
+            '''Detections'''
+            detections = [Tracklet_w_bbox_buffer(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets, scores_keep, cates)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with high score detection boxes'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        dists = buffered_iou_distance(tracklet_pool, detections, level=1)
+        
+        matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        ''' Step 3: Second association, with low score detection boxes'''
+        # association the untrack to the low score detections
+        if len(dets_second) > 0:
+            '''Detections'''
+            detections_second = [Tracklet_w_bbox_buffer(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
+        else:
+            detections_second = []
+        r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+
+
+        dists = buffered_iou_distance(r_tracked_tracklets, detections_second, level=2)
+
+        matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
+        for itracked, idet in matches:
+            track = r_tracked_tracklets[itracked]
+            det = detections_second[idet]
+            if track.state == TrackState.Tracked:
+                track.update(det, self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        for it in u_track:
+            track = r_tracked_tracklets[it]
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detections[i] for i in u_detection]
+        dists = buffered_iou_distance(unconfirmed, detections, level=1)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/yolov7-tracker-example/tracker/trackers/camera_motion_compensation.py
+++ b/yolov7-tracker-example/tracker/trackers/camera_motion_compensation.py
@@ -0,0 +1,264 @@
+import cv2
+import numpy as np 
+import copy 
+import matplotlib.pyplot as plt
+
+"""GMC Module"""
+class GMC:
+    def __init__(self, method='orb', downscale=2, verbose=None):
+        super(GMC, self).__init__()
+
+        self.method = method
+        self.downscale = max(1, int(downscale))
+
+        if self.method == 'orb':
+            self.detector = cv2.FastFeatureDetector_create(20)
+            self.extractor = cv2.ORB_create()
+            self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
+
+        elif self.method == 'sift':
+            self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
+            self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
+            self.matcher = cv2.BFMatcher(cv2.NORM_L2)
+
+        elif self.method == 'ecc':
+            number_of_iterations = 100
+            termination_eps = 1e-5
+            self.warp_mode = cv2.MOTION_EUCLIDEAN
+            self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
+
+        elif self.method == 'file' or self.method == 'files':
+            seqName = verbose[0]
+            ablation = verbose[1]
+            if ablation:
+                filePath = r'tracker/GMC_files/MOT17_ablation'
+            else:
+                filePath = r'tracker/GMC_files/MOTChallenge'
+
+            if '-FRCNN' in seqName:
+                seqName = seqName[:-6]
+            elif '-DPM' in seqName:
+                seqName = seqName[:-4]
+            elif '-SDP' in seqName:
+                seqName = seqName[:-4]
+
+            self.gmcFile = open(filePath + "/GMC-" + seqName + ".txt", 'r')
+
+            if self.gmcFile is None:
+                raise ValueError("Error: Unable to open GMC file in directory:" + filePath)
+        elif self.method == 'none' or self.method == 'None':
+            self.method = 'none'
+        else:
+            raise ValueError("Error: Unknown CMC method:" + method)
+
+        self.prevFrame = None
+        self.prevKeyPoints = None
+        self.prevDescriptors = None
+
+        self.initializedFirstFrame = False
+
+    def apply(self, raw_frame, detections=None):
+        if self.method == 'orb' or self.method == 'sift':
+            return self.applyFeaures(raw_frame, detections)
+        elif self.method == 'ecc':
+            return self.applyEcc(raw_frame, detections)
+        elif self.method == 'file':
+            return self.applyFile(raw_frame, detections)
+        elif self.method == 'none':
+            return np.eye(2, 3)
+        else:
+            return np.eye(2, 3)
+
+    def applyEcc(self, raw_frame, detections=None):
+
+        # Initialize
+        height, width, _ = raw_frame.shape
+        frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
+        H = np.eye(2, 3, dtype=np.float32)
+
+        # Downscale image (TODO: consider using pyramids)
+        if self.downscale > 1.0:
+            frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
+            frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
+            width = width // self.downscale
+            height = height // self.downscale
+
+        # Handle first frame
+        if not self.initializedFirstFrame:
+            # Initialize data
+            self.prevFrame = frame.copy()
+
+            # Initialization done
+            self.initializedFirstFrame = True
+
+            return H
+
+        # Run the ECC algorithm. The results are stored in warp_matrix.
+        # (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
+        try:
+            (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
+        except:
+            print('Warning: find transform failed. Set warp as identity')
+
+        return H
+
+    def applyFeaures(self, raw_frame, detections=None):
+
+        # Initialize
+        height, width, _ = raw_frame.shape
+        frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
+        H = np.eye(2, 3)
+
+        # Downscale image (TODO: consider using pyramids)
+        if self.downscale > 1.0:
+            # frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
+            frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
+            width = width // self.downscale
+            height = height // self.downscale
+
+        # find the keypoints
+        mask = np.zeros_like(frame)
+        # mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255
+        mask[int(0.02 * height): int(0.98 * height), int(0.02 * width): int(0.98 * width)] = 255
+        if detections is not None:
+            for det in detections:
+                tlbr = (det[:4] / self.downscale).astype(np.int_)
+                mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0
+
+        keypoints = self.detector.detect(frame, mask)
+
+        # compute the descriptors
+        keypoints, descriptors = self.extractor.compute(frame, keypoints)
+
+        # Handle first frame
+        if not self.initializedFirstFrame:
+            # Initialize data
+            self.prevFrame = frame.copy()
+            self.prevKeyPoints = copy.copy(keypoints)
+            self.prevDescriptors = copy.copy(descriptors)
+
+            # Initialization done
+            self.initializedFirstFrame = True
+
+            return H
+
+        # Match descriptors.
+        knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)
+
+        # Filtered matches based on smallest spatial distance
+        matches = []
+        spatialDistances = []
+
+        maxSpatialDistance = 0.25 * np.array([width, height])
+
+        # Handle empty matches case
+        if len(knnMatches) == 0:
+            # Store to next iteration
+            self.prevFrame = frame.copy()
+            self.prevKeyPoints = copy.copy(keypoints)
+            self.prevDescriptors = copy.copy(descriptors)
+
+            return H
+
+        for m, n in knnMatches:
+            if m.distance < 0.9 * n.distance:
+                prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
+                currKeyPointLocation = keypoints[m.trainIdx].pt
+
+                spatialDistance = (prevKeyPointLocation[0] - currKeyPointLocation[0],
+                                   prevKeyPointLocation[1] - currKeyPointLocation[1])
+
+                if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \
+                        (np.abs(spatialDistance[1]) < maxSpatialDistance[1]):
+                    spatialDistances.append(spatialDistance)
+                    matches.append(m)
+
+        meanSpatialDistances = np.mean(spatialDistances, 0)
+        stdSpatialDistances = np.std(spatialDistances, 0)
+
+        inliesrs = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances
+
+        goodMatches = []
+        prevPoints = []
+        currPoints = []
+        for i in range(len(matches)):
+            if inliesrs[i, 0] and inliesrs[i, 1]:
+                goodMatches.append(matches[i])
+                prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
+                currPoints.append(keypoints[matches[i].trainIdx].pt)
+
+        prevPoints = np.array(prevPoints)
+        currPoints = np.array(currPoints)
+
+        # Draw the keypoint matches on the output image
+        if 0:
+            matches_img = np.hstack((self.prevFrame, frame))
+            matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
+            W = np.size(self.prevFrame, 1)
+            for m in goodMatches:
+                prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
+                curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
+                curr_pt[0] += W
+                color = np.random.randint(0, 255, (3,))
+                color = (int(color[0]), int(color[1]), int(color[2]))
+
+                matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
+                matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)
+                matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)
+
+            plt.figure()
+            plt.imshow(matches_img)
+            plt.show()
+
+        # Find rigid matrix
+        if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
+            H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
+
+            # Handle downscale
+            if self.downscale > 1.0:
+                H[0, 2] *= self.downscale
+                H[1, 2] *= self.downscale
+        else:
+            print('Warning: not enough matching points')
+
+        # Store to next iteration
+        self.prevFrame = frame.copy()
+        self.prevKeyPoints = copy.copy(keypoints)
+        self.prevDescriptors = copy.copy(descriptors)
+
+        return H
+
+    def applyFile(self, raw_frame, detections=None):
+        line = self.gmcFile.readline()
+        tokens = line.split("\t")
+        H = np.eye(2, 3, dtype=np.float_)
+        H[0, 0] = float(tokens[1])
+        H[0, 1] = float(tokens[2])
+        H[0, 2] = float(tokens[3])
+        H[1, 0] = float(tokens[4])
+        H[1, 1] = float(tokens[5])
+        H[1, 2] = float(tokens[6])
+
+        return H
+
+    @staticmethod
+    def multi_gmc(stracks, H=np.eye(2, 3)):
+        """
+        GMC module prediction
+        :param stracks: List[Strack]
+        """
+        if len(stracks) > 0:
+            multi_mean = np.asarray([st.kalman_filter.kf.x.copy() for st in stracks])
+            multi_covariance = np.asarray([st.kalman_filter.kf.P for st in stracks])
+
+            R = H[:2, :2]
+            R8x8 = np.kron(np.eye(4, dtype=float), R)
+            t = H[:2, 2]
+
+            for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
+                mean = R8x8.dot(mean)
+                mean[:2] += t
+                cov = R8x8.dot(cov).dot(R8x8.transpose())
+
+                stracks[i].kalman_filter.kf.x = mean
+                stracks[i].kalman_filter.kf.P = cov
--- a/yolov7-tracker-example/tracker/trackers/deepsort_tracker.py
+++ b/yolov7-tracker-example/tracker/trackers/deepsort_tracker.py
@@ -0,0 +1,327 @@
+"""
+Deep Sort
+"""
+
+import numpy as np  
+import torch 
+from torchvision.ops import nms
+
+import cv2 
+import torchvision.transforms as T
+
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet, Tracklet_w_reid
+from .matching import *
+
+from .reid_models.OSNet import *
+from .reid_models.load_model_tools import load_pretrained_weights
+from .reid_models.deepsort_reid import Extractor
+
+REID_MODEL_DICT = {
+    'osnet_x1_0': osnet_x1_0, 
+    'osnet_x0_75': osnet_x0_75, 
+    'osnet_x0_5': osnet_x0_5, 
+    'osnet_x0_25': osnet_x0_25, 
+    'deepsort': Extractor
+}
+
+
+def load_reid_model(reid_model, reid_model_path):
+    
+    if 'osnet' in reid_model:
+        func = REID_MODEL_DICT[reid_model]
+        model = func(num_classes=1, pretrained=False, )
+        load_pretrained_weights(model, reid_model_path)
+        model.cuda().eval()
+        
+    elif 'deepsort' in reid_model:
+        model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
+
+    else:
+        raise NotImplementedError
+    
+    return model
+
+
+class DeepSortTracker(object):
+
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+        self.with_reid = not args.discard_reid
+
+        self.reid_model, self.crop_transforms = None, None 
+        if self.with_reid:
+            self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
+            self.crop_transforms = T.Compose([
+            # T.ToPILImage(),
+            # T.Resize(size=(256, 128)),
+            T.ToTensor(),  # (c, 128, 256)
+            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        ])
+            
+        self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128)
+        
+
+    def reid_preprocess(self, obj_bbox):
+        """
+        preprocess cropped object bboxes 
+        
+        obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
+
+        return: 
+        torch.Tensor of shape (c, 128, 256)
+        """
+
+        obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size)  # shape: (h, w, c)
+
+        return self.crop_transforms(obj_bbox)
+
+    def get_feature(self, tlwhs, ori_img):
+        """
+        get apperance feature of an object
+        tlwhs: shape (num_of_objects, 4)
+        ori_img: original image, np.ndarray, shape(H, W, C)
+        """
+        obj_bbox = []
+
+        for tlwh in tlwhs:
+            tlwh = list(map(int, tlwh))
+
+            # limit to the legal range
+            tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0)
+            
+            tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
+
+            obj_bbox.append(tlbr_tensor)
+        
+        if not obj_bbox:
+            return np.array([])
+        
+        obj_bbox = torch.stack(obj_bbox, dim=0)
+        obj_bbox = obj_bbox.cuda()  
+        
+        features = self.reid_model(obj_bbox)  # shape: (num_of_objects, feature_dim)
+        return features.cpu().detach().numpy()
+    
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlbr format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        
+        scores_keep = scores[remain_inds]
+
+        features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
+
+        if len(dets) > 0:
+            '''Detections'''
+            detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
+                          (tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with appearance'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+
+        matches, u_track, u_detection = matching_cascade(distance_metric=self.gated_metric, 
+                                                         matching_thresh=0.9, 
+                                                         cascade_depth=30, 
+                                                         tracks=tracklet_pool, 
+                                                         detections=detections
+                                                         )
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        '''Step 3: Second association, with iou'''
+        tracklet_for_iou = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+        detection_for_iou = [detections[i] for i in u_detection]
+
+        dists = iou_distance(tracklet_for_iou, detection_for_iou)
+
+        matches, u_track, u_detection = linear_assignment(dists, thresh=0.5)
+
+        for itracked, idet in matches:
+            track = tracklet_for_iou[itracked]
+            det = detection_for_iou[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detection_for_iou[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        for it in u_track:
+            track = tracklet_for_iou[it]
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detection_for_iou[i] for i in u_detection]
+        dists = iou_distance(unconfirmed, detections)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+    
+    def gated_metric(self, tracks, dets):
+        """
+        get cost matrix, firstly calculate apperence cost, then filter by Kalman state.
+
+        tracks: List[STrack]
+        dets: List[STrack]
+        """
+        apperance_dist = nearest_embedding_distance(tracks=tracks, detections=dets, metric='cosine')
+        cost_matrix = self.gate_cost_matrix(apperance_dist, tracks, dets, )
+        return cost_matrix
+    
+    def gate_cost_matrix(self, cost_matrix, tracks, dets, max_apperance_thresh=0.15, gated_cost=1e5, only_position=False):
+        """
+        gate cost matrix by calculating the Kalman state distance and constrainted by
+        0.95 confidence interval of x2 distribution
+
+        cost_matrix: np.ndarray, shape (len(tracks), len(dets))
+        tracks: List[STrack]
+        dets: List[STrack]
+        gated_cost: a very largt const to infeasible associations
+        only_position: use [xc, yc, a, h] as state vector or only use [xc, yc]
+
+        return:
+        updated cost_matirx, np.ndarray
+        """
+        gating_dim = 2 if only_position else 4
+        gating_threshold = chi2inv95[gating_dim]
+        measurements = np.asarray([Tracklet.tlwh_to_xyah(det.tlwh) for det in dets])  # (len(dets), 4)
+
+        cost_matrix[cost_matrix > max_apperance_thresh] = gated_cost
+        for row, track in enumerate(tracks):
+            gating_distance = track.kalman_filter.gating_distance(measurements, )
+            cost_matrix[row, gating_distance > gating_threshold] = gated_cost
+        return cost_matrix
+    
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/yolov7-tracker-example/tracker/trackers/kalman_filters/base_kalman.py
+++ b/yolov7-tracker-example/tracker/trackers/kalman_filters/base_kalman.py
@@ -0,0 +1,74 @@
+from filterpy.kalman import KalmanFilter
+import numpy as np 
+import scipy
+
+class BaseKalman:
+
+    def __init__(self,
+                 state_dim: int = 8, 
+                 observation_dim: int = 4, 
+                 F: np.ndarray = np.zeros((0, )), 
+                 P: np.ndarray = np.zeros((0, )),  
+                 Q: np.ndarray = np.zeros((0, )),  
+                 H: np.ndarray = np.zeros((0, )), 
+                 R: np.ndarray = np.zeros((0, )), 
+                 ) -> None:
+        
+        self.kf = KalmanFilter(dim_x=state_dim, dim_z=observation_dim, dim_u=0)
+        if F.shape[0] > 0: self.kf.F = F  # if valid 
+        if P.shape[0] > 0: self.kf.P = P 
+        if Q.shape[0] > 0: self.kf.Q = Q 
+        if H.shape[0] > 0: self.kf.H = H 
+        if R.shape[0] > 0: self.kf.R = R 
+
+    def initialize(self, observation):
+        return NotImplementedError
+
+    def predict(self, ):
+        self.kf.predict()
+
+    def update(self, observation, **kwargs):
+        self.kf.update(observation, self.R, self.H)
+
+    def get_state(self, ):
+        return self.kf.x
+    
+    def gating_distance(self, measurements, only_position=False):
+        """Compute gating distance between state distribution and measurements.
+        A suitable distance threshold can be obtained from `chi2inv95`. If
+        `only_position` is False, the chi-square distribution has 4 degrees of
+        freedom, otherwise 2.
+        Parameters
+        ----------
+        measurements : ndarray
+            An Nx4 dimensional matrix of N measurements, note the format (whether xywh or xyah or others)
+            should be identical to state definition
+        only_position : Optional[bool]
+            If True, distance computation is done with respect to the bounding
+            box center position only.
+        Returns
+        -------
+        ndarray
+            Returns an array of length N, where the i-th element contains the
+            squared Mahalanobis distance between (mean, covariance) and
+            `measurements[i]`.
+        """
+        
+        # map state space to measurement space
+        mean = self.kf.x.copy()
+        mean = np.dot(self.kf.H, mean)
+        covariance = np.linalg.multi_dot((self.kf.H, self.kf.P, self.kf.H.T))
+
+        if only_position:
+            mean, covariance = mean[:2], covariance[:2, :2]
+            measurements = measurements[:, :2]
+
+        cholesky_factor = np.linalg.cholesky(covariance)
+        d = measurements - mean
+        z = scipy.linalg.solve_triangular(
+            cholesky_factor, d.T, lower=True, check_finite=False,
+            overwrite_b=True)
+        squared_maha = np.sum(z * z, axis=0)
+        return squared_maha
+
+    
--- a/yolov7-tracker-example/tracker/trackers/kalman_filters/botsort_kalman.py
+++ b/yolov7-tracker-example/tracker/trackers/kalman_filters/botsort_kalman.py
@@ -0,0 +1,99 @@
+from numpy.core.multiarray import zeros as zeros
+from .base_kalman import BaseKalman
+import numpy as np 
+import cv2 
+
+class BotKalman(BaseKalman):
+
+    def __init__(self, ):
+
+        state_dim = 8  # [x, y, w, h, vx, vy, vw, vh]
+        observation_dim = 4 
+
+        F = np.eye(state_dim, state_dim)
+        '''
+        [1, 0, 0, 0, 1, 0, 0]
+        [0, 1, 0, 0, 0, 1, 0]
+        ...
+        '''
+        for i in range(state_dim // 2):
+            F[i, i + state_dim // 2] = 1
+
+        H = np.eye(state_dim // 2, state_dim)
+    
+        super().__init__(state_dim=state_dim, 
+                       observation_dim=observation_dim, 
+                       F=F, 
+                       H=H)
+        
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
+        
+    def initialize(self, observation):
+        """ init x, P, Q, R
+        
+        Args:
+            observation: x-y-w-h format
+        """
+        # init x, P, Q, R
+
+        mean_pos = observation
+        mean_vel = np.zeros_like(observation)
+        self.kf.x = np.r_[mean_pos, mean_vel]  # x_{0, 0}
+
+        std = [
+            2 * self._std_weight_position * observation[2],  # related to h
+            2 * self._std_weight_position * observation[3], 
+            2 * self._std_weight_position * observation[2], 
+            2 * self._std_weight_position * observation[3], 
+            10 * self._std_weight_velocity * observation[2], 
+            10 * self._std_weight_velocity * observation[3], 
+            10 * self._std_weight_velocity * observation[2], 
+            10 * self._std_weight_velocity * observation[3], 
+        ]       
+
+        self.kf.P = np.diag(np.square(std))  # P_{0, 0}
+
+    def predict(self, ):
+        """ predict step
+
+        x_{n + 1, n} = F * x_{n, n} 
+        P_{n + 1, n} = F * P_{n, n} * F^T + Q
+
+        """
+        std_pos = [
+            self._std_weight_position * self.kf.x[2],
+            self._std_weight_position * self.kf.x[3],
+            self._std_weight_position * self.kf.x[2],
+            self._std_weight_position * self.kf.x[3]]
+        std_vel = [
+            self._std_weight_velocity * self.kf.x[2],
+            self._std_weight_velocity * self.kf.x[3],
+            self._std_weight_velocity * self.kf.x[2],
+            self._std_weight_velocity * self.kf.x[3]]
+        
+        Q = np.diag(np.square(np.r_[std_pos, std_vel]))
+
+        self.kf.predict(Q=Q)
+        
+    def update(self, z):
+        """ update step
+        
+        Args:
+            z: observation x-y-a-h format
+
+        K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
+        x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
+        P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
+
+        """
+
+        std = [
+            self._std_weight_position * self.kf.x[2],
+            self._std_weight_position * self.kf.x[3],
+            self._std_weight_position * self.kf.x[2],
+            self._std_weight_position * self.kf.x[3]]
+        
+        R = np.diag(np.square(std))
+
+        self.kf.update(z=z, R=R)
--- a/yolov7-tracker-example/tracker/trackers/kalman_filters/bytetrack_kalman.py
+++ b/yolov7-tracker-example/tracker/trackers/kalman_filters/bytetrack_kalman.py
@@ -0,0 +1,97 @@
+from .base_kalman import BaseKalman
+import numpy as np 
+
+class ByteKalman(BaseKalman):
+
+    def __init__(self, ):
+
+        state_dim = 8  # [x, y, a, h, vx, vy, va, vh]
+        observation_dim = 4 
+
+        F = np.eye(state_dim, state_dim)
+        '''
+        [1, 0, 0, 0, 1, 0, 0]
+        [0, 1, 0, 0, 0, 1, 0]
+        ...
+        '''
+        for i in range(state_dim // 2):
+            F[i, i + state_dim // 2] = 1
+
+        H = np.eye(state_dim // 2, state_dim)
+    
+        super().__init__(state_dim=state_dim, 
+                       observation_dim=observation_dim, 
+                       F=F, 
+                       H=H)
+        
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
+        
+    def initialize(self, observation):
+        """ init x, P, Q, R
+        
+        Args:
+            observation: x-y-a-h format
+        """
+        # init x, P, Q, R
+
+        mean_pos = observation
+        mean_vel = np.zeros_like(observation)
+        self.kf.x = np.r_[mean_pos, mean_vel]  # x_{0, 0}
+
+        std = [
+            2 * self._std_weight_position * observation[3],  # related to h
+            2 * self._std_weight_position * observation[3], 
+            1e-2, 
+            2 * self._std_weight_position * observation[3], 
+            10 * self._std_weight_velocity * observation[3], 
+            10 * self._std_weight_velocity * observation[3], 
+            1e-5, 
+            10 * self._std_weight_velocity * observation[3], 
+        ]       
+
+        self.kf.P = np.diag(np.square(std))  # P_{0, 0}
+
+    def predict(self, ):
+        """ predict step
+
+        x_{n + 1, n} = F * x_{n, n} 
+        P_{n + 1, n} = F * P_{n, n} * F^T + Q
+
+        """
+        std_pos = [
+            self._std_weight_position * self.kf.x[3],
+            self._std_weight_position * self.kf.x[3],
+            1e-2,
+            self._std_weight_position * self.kf.x[3]]
+        std_vel = [
+            self._std_weight_velocity * self.kf.x[3],
+            self._std_weight_velocity * self.kf.x[3],
+            1e-5,
+            self._std_weight_velocity * self.kf.x[3]]
+        
+        Q = np.diag(np.square(np.r_[std_pos, std_vel]))
+
+        self.kf.predict(Q=Q)
+        
+    def update(self, z):
+        """ update step
+        
+        Args:
+            z: observation x-y-a-h format
+
+        K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
+        x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
+        P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
+
+        """
+
+        std = [
+            self._std_weight_position * self.kf.x[3],
+            self._std_weight_position * self.kf.x[3],
+            1e-1,
+            self._std_weight_position * self.kf.x[3]]
+        
+        R = np.diag(np.square(std))
+
+        self.kf.update(z=z, R=R)
--- a/yolov7-tracker-example/tracker/trackers/kalman_filters/ocsort_kalman.py
+++ b/yolov7-tracker-example/tracker/trackers/kalman_filters/ocsort_kalman.py
@@ -0,0 +1,144 @@
+from numpy.core.multiarray import zeros as zeros
+from .base_kalman import BaseKalman
+import numpy as np 
+from copy import deepcopy
+
+class OCSORTKalman(BaseKalman):
+
+    def __init__(self, ):
+        
+        state_dim = 7  # [x, y, s, a, vx, vy, vs]  s: area
+        observation_dim = 4 
+
+        F = np.array([[1, 0, 0, 0, 1, 0, 0], 
+                      [0, 1, 0, 0, 0, 1, 0], 
+                      [0, 0, 1, 0, 0, 0, 1], 
+                      [0, 0, 0, 1, 0, 0, 0],  
+                      [0, 0, 0, 0, 1, 0, 0], 
+                      [0, 0, 0, 0, 0, 1, 0], 
+                      [0, 0, 0, 0, 0, 0, 1]])
+
+        H = np.eye(state_dim // 2 + 1, state_dim)
+    
+        super().__init__(state_dim=state_dim, 
+                       observation_dim=observation_dim, 
+                       F=F, 
+                       H=H)
+        
+        # TODO check
+        # give high uncertainty to the unobservable initial velocities
+        self.kf.R[2:, 2:] *= 10  # [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 10]]
+        self.kf.P[4:, 4:] *= 1000
+        self.kf.P *= 10 
+        self.kf.Q[-1, -1] *= 0.01 
+        self.kf.Q[4:, 4:] *= 0.01 
+
+        # keep all observations 
+        self.history_obs = []
+        self.attr_saved = None
+        self.observed = False 
+    
+    def initialize(self, observation):
+        """
+        Args:
+            observation: x-y-s-a
+        """
+        self.kf.x = self.kf.x.flatten()
+        self.kf.x[:4] = observation
+
+
+    def predict(self, ):
+        """ predict step
+        
+        """
+        
+        # s + vs
+        if (self.kf.x[6] + self.kf.x[2] <= 0):
+            self.kf.x[6] *= 0.0
+
+        self.kf.predict()
+
+    def _freeze(self, ):
+        """ freeze all the param of Kalman
+        
+        """
+        self.attr_saved = deepcopy(self.kf.__dict__)
+
+    def _unfreeze(self, ):
+        """ when observe an lost object again, use the virtual trajectory
+        
+        """
+        if self.attr_saved is not None:
+            new_history = deepcopy(self.history_obs)
+            self.kf.__dict__ = self.attr_saved 
+
+            self.history_obs = self.history_obs[:-1]
+
+            occur = [int(d is None) for d in new_history]
+            indices = np.where(np.array(occur)==0)[0]
+            index1 = indices[-2]
+            index2 = indices[-1]
+            box1 = new_history[index1]
+            x1, y1, s1, r1 = box1 
+            w1 = np.sqrt(s1 * r1)
+            h1 = np.sqrt(s1 / r1)
+            box2 = new_history[index2]
+            x2, y2, s2, r2 = box2 
+            w2 = np.sqrt(s2 * r2)
+            h2 = np.sqrt(s2 / r2)
+            time_gap = index2 - index1
+            dx = (x2-x1)/time_gap
+            dy = (y2-y1)/time_gap 
+            dw = (w2-w1)/time_gap 
+            dh = (h2-h1)/time_gap
+            for i in range(index2 - index1):
+                """
+                    The default virtual trajectory generation is by linear
+                    motion (constant speed hypothesis), you could modify this 
+                    part to implement your own. 
+                """
+                x = x1 + (i+1) * dx 
+                y = y1 + (i+1) * dy 
+                w = w1 + (i+1) * dw 
+                h = h1 + (i+1) * dh
+                s = w * h 
+                r = w / float(h)
+                new_box = np.array([x, y, s, r]).reshape((4, 1))
+                """
+                    I still use predict-update loop here to refresh the parameters,
+                    but this can be faster by directly modifying the internal parameters
+                    as suggested in the paper. I keep this naive but slow way for 
+                    easy read and understanding
+                """
+                self.kf.update(new_box)
+                if not i == (index2-index1-1):
+                    self.kf.predict()
+
+
+    def update(self, z):
+        """ update step
+
+        For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman
+        
+        Args:
+            z: observation x-y-s-a format
+        """
+
+        self.history_obs.append(z)
+
+        if z is None:
+            if self.observed:
+                self._freeze()
+                self.observed = False
+            
+            self.kf.update(z)
+
+        else:
+            if not self.observed:  # Get observation, use online smoothing to re-update parameters
+                self._unfreeze()
+            
+            self.kf.update(z)
+
+        self.observed = True 
+
+
--- a/yolov7-tracker-example/tracker/trackers/kalman_filters/sort_kalman.py
+++ b/yolov7-tracker-example/tracker/trackers/kalman_filters/sort_kalman.py
@@ -0,0 +1,73 @@
+from numpy.core.multiarray import zeros as zeros
+from .base_kalman import BaseKalman
+import numpy as np 
+from copy import deepcopy
+
+class SORTKalman(BaseKalman):
+
+    def __init__(self, ):
+        
+        state_dim = 7  # [x, y, s, a, vx, vy, vs]  s: area
+        observation_dim = 4 
+
+        F = np.array([[1, 0, 0, 0, 1, 0, 0], 
+                      [0, 1, 0, 0, 0, 1, 0], 
+                      [0, 0, 1, 0, 0, 0, 1], 
+                      [0, 0, 0, 1, 0, 0, 0],  
+                      [0, 0, 0, 0, 1, 0, 0], 
+                      [0, 0, 0, 0, 0, 1, 0], 
+                      [0, 0, 0, 0, 0, 0, 1]])
+
+        H = np.eye(state_dim // 2 + 1, state_dim)
+    
+        super().__init__(state_dim=state_dim, 
+                       observation_dim=observation_dim, 
+                       F=F, 
+                       H=H)
+        
+        # TODO check
+        # give high uncertainty to the unobservable initial velocities
+        self.kf.R[2:, 2:] *= 10  # [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 10]]
+        self.kf.P[4:, 4:] *= 1000
+        self.kf.P *= 10 
+        self.kf.Q[-1, -1] *= 0.01 
+        self.kf.Q[4:, 4:] *= 0.01 
+
+        # keep all observations 
+        self.history_obs = []
+        self.attr_saved = None
+        self.observed = False 
+    
+    def initialize(self, observation):
+        """
+        Args:
+            observation: x-y-s-a
+        """
+        self.kf.x = self.kf.x.flatten()
+        self.kf.x[:4] = observation
+
+
+    def predict(self, ):
+        """ predict step
+        
+        """
+        
+        # s + vs
+        if (self.kf.x[6] + self.kf.x[2] <= 0):
+            self.kf.x[6] *= 0.0
+
+        self.kf.predict()
+
+    def update(self, z):
+        """ update step
+
+        For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman
+        
+        Args:
+            z: observation x-y-s-a format
+        """
+
+        self.kf.update(z)
+
+
+
--- a/yolov7-tracker-example/tracker/trackers/kalman_filters/strongsort_kalman.py
+++ b/yolov7-tracker-example/tracker/trackers/kalman_filters/strongsort_kalman.py
@@ -0,0 +1,101 @@
+from .base_kalman import BaseKalman
+import numpy as np 
+
+class NSAKalman(BaseKalman):
+
+    def __init__(self, ):
+
+        state_dim = 8  # [x, y, a, h, vx, vy, va, vh]
+        observation_dim = 4 
+
+        F = np.eye(state_dim, state_dim)
+        '''
+        [1, 0, 0, 0, 1, 0, 0]
+        [0, 1, 0, 0, 0, 1, 0]
+        ...
+        '''
+        for i in range(state_dim // 2):
+            F[i, i + state_dim // 2] = 1
+
+        H = np.eye(state_dim // 2, state_dim)
+    
+        super().__init__(state_dim=state_dim, 
+                       observation_dim=observation_dim, 
+                       F=F, 
+                       H=H)
+        
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
+        
+    def initialize(self, observation):
+        """ init x, P, Q, R
+        
+        Args:
+            observation: x-y-a-h format
+        """
+        # init x, P, Q, R
+
+        mean_pos = observation
+        mean_vel = np.zeros_like(observation)
+        self.kf.x = np.r_[mean_pos, mean_vel]  # x_{0, 0}
+
+        std = [
+            2 * self._std_weight_position * observation[3],  # related to h
+            2 * self._std_weight_position * observation[3], 
+            1e-2, 
+            2 * self._std_weight_position * observation[3], 
+            10 * self._std_weight_velocity * observation[3], 
+            10 * self._std_weight_velocity * observation[3], 
+            1e-5, 
+            10 * self._std_weight_velocity * observation[3], 
+        ]       
+
+        self.kf.P = np.diag(np.square(std))  # P_{0, 0}
+
+    def predict(self, ):
+        """ predict step
+
+        x_{n + 1, n} = F * x_{n, n} 
+        P_{n + 1, n} = F * P_{n, n} * F^T + Q
+
+        """
+        std_pos = [
+            self._std_weight_position * self.kf.x[3],
+            self._std_weight_position * self.kf.x[3],
+            1e-2,
+            self._std_weight_position * self.kf.x[3]]
+        std_vel = [
+            self._std_weight_velocity * self.kf.x[3],
+            self._std_weight_velocity * self.kf.x[3],
+            1e-5,
+            self._std_weight_velocity * self.kf.x[3]]
+        
+        Q = np.diag(np.square(np.r_[std_pos, std_vel]))
+
+        self.kf.predict(Q=Q)
+        
+    def update(self, z, score):
+        """ update step
+        
+        Args:
+            z: observation x-y-a-h format
+            score: the detection score/confidence required by NSA kalman
+
+        K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
+        x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
+        P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
+
+        """
+
+        std = [
+            self._std_weight_position * self.kf.x[3],
+            self._std_weight_position * self.kf.x[3],
+            1e-1,
+            self._std_weight_position * self.kf.x[3]]
+        
+        # NSA 
+        std = [(1. - score) * x for x in std]
+        
+        R = np.diag(np.square(std))
+
+        self.kf.update(z=z, R=R)
--- a/yolov7-tracker-example/tracker/trackers/kalman_filters/ucmctrack_kalman.py
+++ b/yolov7-tracker-example/tracker/trackers/kalman_filters/ucmctrack_kalman.py
@@ -0,0 +1,27 @@
+from .base_kalman import BaseKalman
+import numpy as np 
+
+class UCMCKalman(BaseKalman):
+    def __init__(self, ):
+
+        state_dim = 8
+        observation_dim = 4 
+
+        F = np.eye(state_dim, state_dim)
+        '''
+        [1, 0, 0, 0, 1, 0, 0]
+        [0, 1, 0, 0, 0, 1, 0]
+        ...
+        '''
+        for i in range(state_dim // 2):
+            F[i, i + state_dim // 2] = 1
+
+        H = np.eye(state_dim // 2, state_dim)
+    
+        super().__init__(state_dim=state_dim, 
+                       observation_dim=observation_dim, 
+                       F=F, 
+                       H=H)
+        
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
--- a/yolov7-tracker-example/tracker/trackers/matching.py
+++ b/yolov7-tracker-example/tracker/trackers/matching.py
@@ -0,0 +1,388 @@
+import cv2
+import numpy as np
+import scipy
+import lap
+from scipy.spatial.distance import cdist
+import math
+from cython_bbox import bbox_overlaps as bbox_ious
+import time
+
+chi2inv95 = {
+    1: 3.8415,
+    2: 5.9915,
+    3: 7.8147,
+    4: 9.4877,
+    5: 11.070,
+    6: 12.592,
+    7: 14.067,
+    8: 15.507,
+    9: 16.919}
+
+
+def merge_matches(m1, m2, shape):
+    O,P,Q = shape
+    m1 = np.asarray(m1)
+    m2 = np.asarray(m2)
+
+    M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
+    M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
+
+    mask = M1*M2
+    match = mask.nonzero()
+    match = list(zip(match[0], match[1]))
+    unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
+    unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
+
+    return match, unmatched_O, unmatched_Q
+
+
+def _indices_to_matches(cost_matrix, indices, thresh):
+    matched_cost = cost_matrix[tuple(zip(*indices))]
+    matched_mask = (matched_cost <= thresh)
+
+    matches = indices[matched_mask]
+    unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
+    unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
+
+    return matches, unmatched_a, unmatched_b
+
+
+def linear_assignment(cost_matrix, thresh):
+    if cost_matrix.size == 0:
+        return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
+    matches, unmatched_a, unmatched_b = [], [], []
+    cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
+    for ix, mx in enumerate(x):
+        if mx >= 0:
+            matches.append([ix, mx])
+    unmatched_a = np.where(x < 0)[0]
+    unmatched_b = np.where(y < 0)[0]
+    matches = np.asarray(matches)
+    return matches, unmatched_a, unmatched_b
+
+
+def ious(atlbrs, btlbrs):
+    """
+    Compute cost based on IoU
+    :type atlbrs: list[tlbr] | np.ndarray
+    :type atlbrs: list[tlbr] | np.ndarray
+
+    :rtype ious np.ndarray
+    """
+    ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float64)
+    if ious.size == 0:
+        return ious
+
+    ious = bbox_ious(
+        np.ascontiguousarray(atlbrs, dtype=np.float64),
+        np.ascontiguousarray(btlbrs, dtype=np.float64)
+    )
+
+    return ious
+
+
+def iou_distance(atracks, btracks):
+    """
+    Compute cost based on IoU
+    :type atracks: list[STrack]
+    :type btracks: list[STrack]
+
+    :rtype cost_matrix np.ndarray
+    """
+
+    if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
+        atlbrs = atracks
+        btlbrs = btracks
+    else:
+        atlbrs = [track.tlbr for track in atracks]
+        btlbrs = [track.tlbr for track in btracks]
+    _ious = ious(atlbrs, btlbrs)
+    cost_matrix = 1 - _ious
+
+    return cost_matrix
+
+def v_iou_distance(atracks, btracks):
+    """
+    Compute cost based on IoU
+    :type atracks: list[STrack]
+    :type btracks: list[STrack]
+
+    :rtype cost_matrix np.ndarray
+    """
+
+    if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
+        atlbrs = atracks
+        btlbrs = btracks
+    else:
+        atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
+        btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
+    _ious = ious(atlbrs, btlbrs)
+    cost_matrix = 1 - _ious
+
+    return cost_matrix
+
+def embedding_distance(tracks, detections, metric='cosine'):
+    """
+    :param tracks: list[STrack]
+    :param detections: list[BaseTrack]
+    :param metric:
+    :return: cost_matrix np.ndarray
+    """
+
+    cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float64)
+    if cost_matrix.size == 0:
+        return cost_matrix
+    det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float64)
+    #for i, track in enumerate(tracks):
+        #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
+    track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float64)
+    cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric))  # Nomalized features
+    return cost_matrix
+
+
+def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
+    if cost_matrix.size == 0:
+        return cost_matrix
+    gating_dim = 2 if only_position else 4
+    gating_threshold = chi2inv95[gating_dim]
+    measurements = np.asarray([det.to_xyah() for det in detections])
+    for row, track in enumerate(tracks):
+        gating_distance = kf.gating_distance(
+            track.mean, track.covariance, measurements, only_position, metric='maha')
+        cost_matrix[row, gating_distance > gating_threshold] = np.inf
+        cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
+    return cost_matrix
+
+
+def fuse_iou(cost_matrix, tracks, detections):
+    if cost_matrix.size == 0:
+        return cost_matrix
+    reid_sim = 1 - cost_matrix
+    iou_dist = iou_distance(tracks, detections)
+    iou_sim = 1 - iou_dist
+    fuse_sim = reid_sim * (1 + iou_sim) / 2
+    det_scores = np.array([det.score for det in detections])
+    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
+    #fuse_sim = fuse_sim * (1 + det_scores) / 2
+    fuse_cost = 1 - fuse_sim
+    return fuse_cost
+
+
+def fuse_score(cost_matrix, detections):
+    if cost_matrix.size == 0:
+        return cost_matrix
+    iou_sim = 1 - cost_matrix
+    det_scores = np.array([det.score for det in detections])
+    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
+    fuse_sim = iou_sim * det_scores
+    fuse_cost = 1 - fuse_sim
+    return fuse_cost
+
+
+def greedy_assignment_iou(dist, thresh):
+        matched_indices = []
+        if dist.shape[1] == 0:
+            return np.array(matched_indices, np.int32).reshape(-1, 2)
+        for i in range(dist.shape[0]):
+            j = dist[i].argmin()
+            if dist[i][j] < thresh:
+                dist[:, j] = 1.
+                matched_indices.append([j, i])
+        return np.array(matched_indices, np.int32).reshape(-1, 2)
+    
+def greedy_assignment(dists, threshs):
+    matches = greedy_assignment_iou(dists.T, threshs)
+    u_det = [d for d in range(dists.shape[1]) if not (d in matches[:, 1])]
+    u_track = [d for d in range(dists.shape[0]) if not (d in matches[:, 0])]
+    return matches, u_track,  u_det
+
+def fuse_score_matrix(cost_matrix, detections, tracks):
+    if cost_matrix.size == 0:
+        return cost_matrix
+    iou_sim = 1 - cost_matrix
+    
+    det_scores = np.array([det.score for det in detections])
+    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
+    trk_scores = np.array([trk.score for trk in tracks])
+    trk_scores = np.expand_dims(trk_scores, axis=1).repeat(cost_matrix.shape[1], axis=1)
+    mid_scores = (det_scores + trk_scores) / 2
+    fuse_sim = iou_sim * mid_scores
+    fuse_cost = 1 - fuse_sim
+    
+    return fuse_cost
+
+"""
+calculate buffered IoU, used in C_BIoU_Tracker
+"""
+def buffered_iou_distance(atracks, btracks, level=1):
+    """
+    atracks: list[C_BIoUSTrack], tracks
+    btracks: list[C_BIoUSTrack], detections
+    level: cascade level, 1 or 2
+    """
+    assert level in [1, 2], 'level must be 1 or 2'
+    if level == 1:  # use motion_state1(tracks) and buffer_bbox1(detections) to calculate
+        atlbrs = [track.tlwh_to_tlbr(track.motion_state1) for track in atracks]
+        btlbrs = [det.tlwh_to_tlbr(det.buffer_bbox1) for det in btracks]
+    else:
+        atlbrs = [track.tlwh_to_tlbr(track.motion_state2) for track in atracks]
+        btlbrs = [det.tlwh_to_tlbr(det.buffer_bbox2) for det in btracks]
+    _ious = ious(atlbrs, btlbrs)
+
+    cost_matrix = 1 - _ious
+    return cost_matrix
+
+"""
+observation centric association, with velocity, for OC Sort
+"""
+def observation_centric_association(tracklets, detections, iou_threshold, velocities, previous_obs, vdc_weight):    
+
+    if(len(tracklets) == 0):
+        return np.empty((0, 2), dtype=int), tuple(range(len(tracklets))), tuple(range(len(detections)))
+    
+    # get numpy format bboxes
+    trk_tlbrs = np.array([track.tlbr for track in tracklets])
+    det_tlbrs = np.array([det.tlbr for det in detections])
+    det_scores = np.array([det.score for det in detections])
+
+    iou_matrix = bbox_ious(trk_tlbrs, det_tlbrs)
+
+    Y, X = speed_direction_batch(det_tlbrs, previous_obs)
+    inertia_Y, inertia_X = velocities[:,0], velocities[:,1]
+    inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
+    inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
+    diff_angle_cos = inertia_X * X + inertia_Y * Y
+    diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
+    diff_angle = np.arccos(diff_angle_cos)
+    diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
+
+    valid_mask = np.ones(previous_obs.shape[0])
+    valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
+
+    scores = np.repeat(det_scores[:, np.newaxis], trk_tlbrs.shape[0], axis=1)
+    valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
+
+    angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
+    angle_diff_cost = angle_diff_cost * scores.T
+
+    matches, unmatched_a, unmatched_b = linear_assignment(- (iou_matrix + angle_diff_cost), thresh=0.9)
+
+
+    return matches, unmatched_a, unmatched_b
+
+"""
+helper func of observation_centric_association
+"""
+def speed_direction_batch(dets, tracks):
+    tracks = tracks[..., np.newaxis]
+    CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:,1] + dets[:,3]) / 2.0
+    CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0
+    dx = CX2 - CX1 
+    dy = CY2 - CY1 
+    norm = np.sqrt(dx**2 + dy**2) + 1e-6
+    dx = dx / norm 
+    dy = dy / norm
+    return dy, dx  # size: num_track x num_det
+
+
+def matching_cascade(
+        distance_metric, matching_thresh, cascade_depth, tracks, detections,
+        track_indices=None, detection_indices=None):
+    """
+    Run matching cascade in DeepSORT
+
+    distance_metirc: function that calculate the cost matrix
+    matching_thresh: float, Associations with cost larger than this value are disregarded.
+    cascade_path: int, equal to max_age of a tracklet
+    tracks: List[STrack], current tracks
+    detections: List[STrack], current detections
+    track_indices: List[int], tracks that will be calculated, Default None
+    detection_indices: List[int], detections that will be calculated, Default None
+
+    return:
+    matched pair, unmatched tracks, unmatced detections: List[int], List[int], List[int]
+    """
+    if track_indices is None:
+        track_indices = list(range(len(tracks)))
+    if detection_indices is None:
+        detection_indices = list(range(len(detections)))
+
+    detections_to_match = detection_indices
+    matches = []
+
+    for level in range(cascade_depth):
+        """
+        match new track with detection firstly
+        """
+        if not len(detections_to_match):  # No detections left
+            break
+
+        track_indices_l = [
+            k for k in track_indices
+            if tracks[k].time_since_update == 1 + level
+        ]  # filter tracks whose age is equal to level + 1 (The age of Newest track = 1)
+
+        if not len(track_indices_l):  # Nothing to match at this level
+            continue
+        
+        # tracks and detections which will be mathcted in current level
+        track_l = [tracks[idx] for idx in track_indices_l]  # List[STrack]
+        det_l = [detections[idx] for idx in detections_to_match]  # List[STrack]
+
+        # calculate the cost matrix
+        cost_matrix = distance_metric(track_l, det_l)
+
+        # solve the linear assignment problem
+        matched_row_col, umatched_row, umatched_col = \
+            linear_assignment(cost_matrix, matching_thresh)
+        
+        for row, col in matched_row_col:  # for those who matched
+            matches.append((track_indices_l[row], detections_to_match[col]))
+
+        umatched_detecion_l = []  # current detections not matched
+        for col in umatched_col:  # for detections not matched
+            umatched_detecion_l.append(detections_to_match[col])
+        
+        detections_to_match = umatched_detecion_l  # update detections to match for next level
+    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
+
+    return matches, unmatched_tracks, detections_to_match
+
+def nearest_embedding_distance(tracks, detections, metric='cosine'):
+    """
+    different from embedding distance, this func calculate the 
+    nearest distance among all track history features and detections
+
+    tracks: list[STrack]
+    detections: list[STrack]
+    metric: str, cosine or euclidean
+    TODO: support euclidean distance
+
+    return:
+    cost_matrix, np.ndarray, shape(len(tracks), len(detections))
+    """
+    cost_matrix = np.zeros((len(tracks), len(detections)))
+    det_features = np.asarray([det.features[-1] for det in detections])
+
+    for row, track in enumerate(tracks):
+        track_history_features = np.asarray(track.features)
+        dist = 1. - cal_cosine_distance(track_history_features, det_features)
+        dist = dist.min(axis=0)
+        cost_matrix[row, :] = dist
+    
+    return cost_matrix
+
+def cal_cosine_distance(mat1, mat2):
+    """
+    simple func to calculate cosine distance between 2 matrixs
+    
+    :param mat1: np.ndarray, shape(M, dim)
+    :param mat2: np.ndarray, shape(N, dim)
+    :return: np.ndarray, shape(M, N)
+    """
+    # result = mat1·mat2^T / |mat1|·|mat2|
+    # norm mat1 and mat2
+    mat1 = mat1 / np.linalg.norm(mat1, axis=1, keepdims=True)
+    mat2 = mat2 / np.linalg.norm(mat2, axis=1, keepdims=True)
+
+    return np.dot(mat1, mat2.T)  
--- a/yolov7-tracker-example/tracker/trackers/ocsort_tracker.py
+++ b/yolov7-tracker-example/tracker/trackers/ocsort_tracker.py
@@ -0,0 +1,237 @@
+"""
+OC Sort
+"""
+
+import numpy as np
+from collections import deque
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet, Tracklet_w_velocity
+from .matching import *
+
+from cython_bbox import bbox_overlaps as bbox_ious
+
+class OCSortTracker(object):
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+        self.delta_t = 3
+
+    @staticmethod
+    def k_previous_obs(observations, cur_age, k):
+        if len(observations) == 0:
+            return [-1, -1, -1, -1, -1]
+        for i in range(k):
+            dt = k - i
+            if cur_age - dt in observations:
+                return observations[cur_age-dt]
+        max_age = max(observations.keys())
+        return observations[max_age]
+
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlbr format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+        inds_low = scores > 0.1
+        inds_high = scores < self.args.conf_thresh
+
+        inds_second = np.logical_and(inds_low, inds_high)
+        dets_second = bboxes[inds_second]
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        cates_second = categories[inds_second]
+        
+        scores_keep = scores[remain_inds]
+        scores_second = scores[inds_second]
+
+        if len(dets) > 0:
+            '''Detections'''
+            detections = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets, scores_keep, cates)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, Observation Centric Momentum'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        velocities = np.array(
+            [trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in tracklet_pool])
+        
+        # last observation, obervation-centric
+        # last_boxes = np.array([trk.last_observation for trk in tracklet_pool])
+
+        # historical observations
+        k_observations = np.array(
+            [self.k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in tracklet_pool])
+
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        # Observation centric cost matrix and assignment
+        matches, u_track, u_detection = observation_centric_association(
+            tracklets=tracklet_pool, detections=detections, iou_threshold=0.3, 
+            velocities=velocities, previous_obs=k_observations, vdc_weight=0.2
+        )
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        ''' Step 3: Second association, with low score detection boxes'''
+        # association the untrack to the low score detections
+        if len(dets_second) > 0:
+            '''Detections'''
+            detections_second = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
+        else:
+            detections_second = []
+        r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+
+        # for unmatched tracks in the first round, use last obervation
+        r_tracked_tracklets_last_observ = [tracklet_pool[i].last_observation[:4] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+        detections_second_bbox = [det.tlbr for det in detections_second]
+
+        dists = 1. - ious(r_tracked_tracklets_last_observ, detections_second_bbox)
+
+        matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
+        for itracked, idet in matches:
+            track = r_tracked_tracklets[itracked]
+            det = detections_second[idet]
+            if track.state == TrackState.Tracked:
+                track.update(det, self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        for it in u_track:
+            track = r_tracked_tracklets[it]
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detections[i] for i in u_detection]
+        dists = iou_distance(unconfirmed, detections)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+    
+    
+
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/yolov7-tracker-example/tracker/trackers/reid_models/AFLink.py
+++ b/yolov7-tracker-example/tracker/trackers/reid_models/AFLink.py
@@ -0,0 +1,98 @@
+"""
+AFLink code in StrongSORT(StrongSORT: Make DeepSORT Great Again(arxiv))
+
+copied from origin repo
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import cv2
+import logging
+import torchvision.transforms as transforms
+
+
+class TemporalBlock(nn.Module):
+    def __init__(self, cin, cout):
+        super(TemporalBlock, self).__init__()
+        self.conv = nn.Conv2d(cin, cout, (7, 1), bias=False)
+        self.relu = nn.ReLU(inplace=True)
+        self.bnf = nn.BatchNorm1d(cout)
+        self.bnx = nn.BatchNorm1d(cout)
+        self.bny = nn.BatchNorm1d(cout)
+
+    def bn(self, x):
+        x[:, :, :, 0] = self.bnf(x[:, :, :, 0])
+        x[:, :, :, 1] = self.bnx(x[:, :, :, 1])
+        x[:, :, :, 2] = self.bny(x[:, :, :, 2])
+        return x
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class FusionBlock(nn.Module):
+    def __init__(self, cin, cout):
+        super(FusionBlock, self).__init__()
+        self.conv = nn.Conv2d(cin, cout, (1, 3), bias=False)
+        self.bn = nn.BatchNorm2d(cout)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class Classifier(nn.Module):
+    def __init__(self, cin):
+        super(Classifier, self).__init__()
+        self.fc1 = nn.Linear(cin*2, cin//2)
+        self.relu = nn.ReLU(inplace=True)
+        self.fc2 = nn.Linear(cin//2, 2)
+
+    def forward(self, x1, x2):
+        x = torch.cat((x1, x2), dim=1)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        return x
+
+
+class PostLinker(nn.Module):
+    def __init__(self):
+        super(PostLinker, self).__init__()
+        self.TemporalModule_1 = nn.Sequential(
+            TemporalBlock(1, 32),
+            TemporalBlock(32, 64),
+            TemporalBlock(64, 128),
+            TemporalBlock(128, 256)
+        )
+        self.TemporalModule_2 = nn.Sequential(
+            TemporalBlock(1, 32),
+            TemporalBlock(32, 64),
+            TemporalBlock(64, 128),
+            TemporalBlock(128, 256)
+        )
+        self.FusionBlock_1 = FusionBlock(256, 256)
+        self.FusionBlock_2 = FusionBlock(256, 256)
+        self.pooling = nn.AdaptiveAvgPool2d((1, 1))
+        self.classifier = Classifier(256)
+
+    def forward(self, x1, x2):
+        x1 = x1[:, :, :, :3]
+        x2 = x2[:, :, :, :3]
+        x1 = self.TemporalModule_1(x1)  # [B,1,30,3] -> [B,256,6,3]
+        x2 = self.TemporalModule_2(x2)
+        x1 = self.FusionBlock_1(x1)
+        x2 = self.FusionBlock_2(x2)
+        x1 = self.pooling(x1).squeeze(-1).squeeze(-1)
+        x2 = self.pooling(x2).squeeze(-1).squeeze(-1)
+        y = self.classifier(x1, x2)
+        if not self.training:
+            y = torch.softmax(y, dim=1)
+        return y
--- a/yolov7-tracker-example/tracker/trackers/reid_models/OSNet.py
+++ b/yolov7-tracker-example/tracker/trackers/reid_models/OSNet.py
@@ -0,0 +1,598 @@
+from __future__ import division, absolute_import
+import warnings
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+__all__ = [
+    'osnet_x1_0', 'osnet_x0_75', 'osnet_x0_5', 'osnet_x0_25', 'osnet_ibn_x1_0'
+]
+
+pretrained_urls = {
+    'osnet_x1_0':
+    'https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY',
+    'osnet_x0_75':
+    'https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq',
+    'osnet_x0_5':
+    'https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i',
+    'osnet_x0_25':
+    'https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs',
+    'osnet_ibn_x1_0':
+    'https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l'
+}
+
+
+##########
+# Basic layers
+##########
+class ConvLayer(nn.Module):
+    """Convolution layer (conv + bn + relu)."""
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        groups=1,
+        IN=False
+    ):
+        super(ConvLayer, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=False,
+            groups=groups
+        )
+        if IN:
+            self.bn = nn.InstanceNorm2d(out_channels, affine=True)
+        else:
+            self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class Conv1x1(nn.Module):
+    """1x1 convolution + bn + relu."""
+
+    def __init__(self, in_channels, out_channels, stride=1, groups=1):
+        super(Conv1x1, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            1,
+            stride=stride,
+            padding=0,
+            bias=False,
+            groups=groups
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class Conv1x1Linear(nn.Module):
+    """1x1 convolution + bn (w/o non-linearity)."""
+
+    def __init__(self, in_channels, out_channels, stride=1):
+        super(Conv1x1Linear, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels, out_channels, 1, stride=stride, padding=0, bias=False
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class Conv3x3(nn.Module):
+    """3x3 convolution + bn + relu."""
+
+    def __init__(self, in_channels, out_channels, stride=1, groups=1):
+        super(Conv3x3, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            3,
+            stride=stride,
+            padding=1,
+            bias=False,
+            groups=groups
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class LightConv3x3(nn.Module):
+    """Lightweight 3x3 convolution.
+
+    1x1 (linear) + dw 3x3 (nonlinear).
+    """
+
+    def __init__(self, in_channels, out_channels):
+        super(LightConv3x3, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels, out_channels, 1, stride=1, padding=0, bias=False
+        )
+        self.conv2 = nn.Conv2d(
+            out_channels,
+            out_channels,
+            3,
+            stride=1,
+            padding=1,
+            bias=False,
+            groups=out_channels
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+##########
+# Building blocks for omni-scale feature learning
+##########
+class ChannelGate(nn.Module):
+    """A mini-network that generates channel-wise gates conditioned on input tensor."""
+
+    def __init__(
+        self,
+        in_channels,
+        num_gates=None,
+        return_gates=False,
+        gate_activation='sigmoid',
+        reduction=16,
+        layer_norm=False
+    ):
+        super(ChannelGate, self).__init__()
+        if num_gates is None:
+            num_gates = in_channels
+        self.return_gates = return_gates
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.fc1 = nn.Conv2d(
+            in_channels,
+            in_channels // reduction,
+            kernel_size=1,
+            bias=True,
+            padding=0
+        )
+        self.norm1 = None
+        if layer_norm:
+            self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
+        self.relu = nn.ReLU(inplace=True)
+        self.fc2 = nn.Conv2d(
+            in_channels // reduction,
+            num_gates,
+            kernel_size=1,
+            bias=True,
+            padding=0
+        )
+        if gate_activation == 'sigmoid':
+            self.gate_activation = nn.Sigmoid()
+        elif gate_activation == 'relu':
+            self.gate_activation = nn.ReLU(inplace=True)
+        elif gate_activation == 'linear':
+            self.gate_activation = None
+        else:
+            raise RuntimeError(
+                "Unknown gate activation: {}".format(gate_activation)
+            )
+
+    def forward(self, x):
+        input = x
+        x = self.global_avgpool(x)
+        x = self.fc1(x)
+        if self.norm1 is not None:
+            x = self.norm1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        if self.gate_activation is not None:
+            x = self.gate_activation(x)
+        if self.return_gates:
+            return x
+        return input * x
+
+
+class OSBlock(nn.Module):
+    """Omni-scale feature learning block."""
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        IN=False,
+        bottleneck_reduction=4,
+        **kwargs
+    ):
+        super(OSBlock, self).__init__()
+        mid_channels = out_channels // bottleneck_reduction
+        self.conv1 = Conv1x1(in_channels, mid_channels)
+        self.conv2a = LightConv3x3(mid_channels, mid_channels)
+        self.conv2b = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+        )
+        self.conv2c = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+        )
+        self.conv2d = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+        )
+        self.gate = ChannelGate(mid_channels)
+        self.conv3 = Conv1x1Linear(mid_channels, out_channels)
+        self.downsample = None
+        if in_channels != out_channels:
+            self.downsample = Conv1x1Linear(in_channels, out_channels)
+        self.IN = None
+        if IN:
+            self.IN = nn.InstanceNorm2d(out_channels, affine=True)
+
+    def forward(self, x):
+        identity = x
+        x1 = self.conv1(x)
+        x2a = self.conv2a(x1)
+        x2b = self.conv2b(x1)
+        x2c = self.conv2c(x1)
+        x2d = self.conv2d(x1)
+        x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
+        x3 = self.conv3(x2)
+        if self.downsample is not None:
+            identity = self.downsample(identity)
+        out = x3 + identity
+        if self.IN is not None:
+            out = self.IN(out)
+        return F.relu(out)
+
+
+##########
+# Network architecture
+##########
+class OSNet(nn.Module):
+    """Omni-Scale Network.
+    
+    Reference:
+        - Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.
+        - Zhou et al. Learning Generalisable Omni-Scale Representations
+          for Person Re-Identification. TPAMI, 2021.
+    """
+
+    def __init__(
+        self,
+        num_classes,
+        blocks,
+        layers,
+        channels,
+        feature_dim=512,
+        loss='softmax',
+        IN=False,
+        **kwargs
+    ):
+        super(OSNet, self).__init__()
+        num_blocks = len(blocks)
+        assert num_blocks == len(layers)
+        assert num_blocks == len(channels) - 1
+        self.loss = loss
+        self.feature_dim = feature_dim
+
+        # convolutional backbone
+        self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
+        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+        self.conv2 = self._make_layer(
+            blocks[0],
+            layers[0],
+            channels[0],
+            channels[1],
+            reduce_spatial_size=True,
+            IN=IN
+        )
+        self.conv3 = self._make_layer(
+            blocks[1],
+            layers[1],
+            channels[1],
+            channels[2],
+            reduce_spatial_size=True
+        )
+        self.conv4 = self._make_layer(
+            blocks[2],
+            layers[2],
+            channels[2],
+            channels[3],
+            reduce_spatial_size=False
+        )
+        self.conv5 = Conv1x1(channels[3], channels[3])
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        # fully connected layer
+        self.fc = self._construct_fc_layer(
+            self.feature_dim, channels[3], dropout_p=None
+        )
+        # identity classification layer
+        self.classifier = nn.Linear(self.feature_dim, num_classes)
+
+        self._init_params()
+
+    def _make_layer(
+        self,
+        block,
+        layer,
+        in_channels,
+        out_channels,
+        reduce_spatial_size,
+        IN=False
+    ):
+        layers = []
+
+        layers.append(block(in_channels, out_channels, IN=IN))
+        for i in range(1, layer):
+            layers.append(block(out_channels, out_channels, IN=IN))
+
+        if reduce_spatial_size:
+            layers.append(
+                nn.Sequential(
+                    Conv1x1(out_channels, out_channels),
+                    nn.AvgPool2d(2, stride=2)
+                )
+            )
+
+        return nn.Sequential(*layers)
+
+    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+        if fc_dims is None or fc_dims < 0:
+            self.feature_dim = input_dim
+            return None
+
+        if isinstance(fc_dims, int):
+            fc_dims = [fc_dims]
+
+        layers = []
+        for dim in fc_dims:
+            layers.append(nn.Linear(input_dim, dim))
+            layers.append(nn.BatchNorm1d(dim))
+            layers.append(nn.ReLU(inplace=True))
+            if dropout_p is not None:
+                layers.append(nn.Dropout(p=dropout_p))
+            input_dim = dim
+
+        self.feature_dim = fc_dims[-1]
+
+        return nn.Sequential(*layers)
+
+    def _init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode='fan_out', nonlinearity='relu'
+                )
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.BatchNorm1d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def featuremaps(self, x):
+        x = self.conv1(x)
+        x = self.maxpool(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x = self.conv4(x)
+        x = self.conv5(x)
+        return x
+
+    def forward(self, x, return_featuremaps=False):
+        x = self.featuremaps(x)
+        if return_featuremaps:
+            return x
+        v = self.global_avgpool(x)
+        v = v.view(v.size(0), -1)
+        if self.fc is not None:
+            v = self.fc(v)
+        if not self.training:
+            return v
+        y = self.classifier(v)
+        if self.loss == 'softmax':
+            return y
+        elif self.loss == 'triplet':
+            return y, v
+        else:
+            raise KeyError("Unsupported loss: {}".format(self.loss))
+
+
+def init_pretrained_weights(model, key=''):
+    """Initializes model with pretrained weights.
+    
+    Layers that don't match with pretrained layers in name or size are kept unchanged.
+    """
+    import os
+    import errno
+    import gdown
+    from collections import OrderedDict
+
+    def _get_torch_home():
+        ENV_TORCH_HOME = 'TORCH_HOME'
+        ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
+        DEFAULT_CACHE_DIR = '~/.cache'
+        torch_home = os.path.expanduser(
+            os.getenv(
+                ENV_TORCH_HOME,
+                os.path.join(
+                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
+                )
+            )
+        )
+        return torch_home
+
+    torch_home = _get_torch_home()
+    model_dir = os.path.join(torch_home, 'checkpoints')
+    try:
+        os.makedirs(model_dir)
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            # Directory already exists, ignore.
+            pass
+        else:
+            # Unexpected OSError, re-raise.
+            raise
+    filename = key + '_imagenet.pth'
+    cached_file = os.path.join(model_dir, filename)
+
+    if not os.path.exists(cached_file):
+        gdown.download(pretrained_urls[key], cached_file, quiet=False)
+
+    state_dict = torch.load(cached_file)
+    model_dict = model.state_dict()
+    new_state_dict = OrderedDict()
+    matched_layers, discarded_layers = [], []
+
+    for k, v in state_dict.items():
+        if k.startswith('module.'):
+            k = k[7:] # discard module.
+
+        if k in model_dict and model_dict[k].size() == v.size():
+            new_state_dict[k] = v
+            matched_layers.append(k)
+        else:
+            discarded_layers.append(k)
+
+    model_dict.update(new_state_dict)
+    model.load_state_dict(model_dict)
+
+    if len(matched_layers) == 0:
+        warnings.warn(
+            'The pretrained weights from "{}" cannot be loaded, '
+            'please check the key names manually '
+            '(** ignored and continue **)'.format(cached_file)
+        )
+    else:
+        print(
+            'Successfully loaded imagenet pretrained weights from "{}"'.
+            format(cached_file)
+        )
+        if len(discarded_layers) > 0:
+            print(
+                '** The following layers are discarded '
+                'due to unmatched keys or layer size: {}'.
+                format(discarded_layers)
+            )
+
+
+##########
+# Instantiation
+##########
+def osnet_x1_0(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
+    # standard size (width x1.0)
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[64, 256, 384, 512],
+        loss=loss,
+        **kwargs
+    )
+    if pretrained:
+        init_pretrained_weights(model, key='osnet_x1_0')
+    return model
+
+
+def osnet_x0_75(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
+    # medium size (width x0.75)
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[48, 192, 288, 384],
+        loss=loss,
+        **kwargs
+    )
+    if pretrained:
+        init_pretrained_weights(model, key='osnet_x0_75')
+    return model
+
+
+def osnet_x0_5(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
+    # tiny size (width x0.5)
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[32, 128, 192, 256],
+        loss=loss,
+        **kwargs
+    )
+    if pretrained:
+        init_pretrained_weights(model, key='osnet_x0_5')
+    return model
+
+
+def osnet_x0_25(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
+    # very tiny size (width x0.25)
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[16, 64, 96, 128],
+        loss=loss,
+        **kwargs
+    )
+    if pretrained:
+        init_pretrained_weights(model, key='osnet_x0_25')
+    return model
+
+
+def osnet_ibn_x1_0(
+    num_classes=1000, pretrained=True, loss='softmax', **kwargs
+):
+    # standard size (width x1.0) + IBN layer
+    # Ref: Pan et al. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. ECCV, 2018.
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[64, 256, 384, 512],
+        loss=loss,
+        IN=True,
+        **kwargs
+    )
+    if pretrained:
+        init_pretrained_weights(model, key='osnet_ibn_x1_0')
+    return model
--- a/yolov7-tracker-example/tracker/trackers/reid_models/init.py
+++ b/yolov7-tracker-example/tracker/trackers/reid_models/init.py
@@ -0,0 +1,3 @@
+"""
+file for reid_models folder
+"""
--- a/yolov7-tracker-example/tracker/trackers/reid_models/deepsort_reid.py
+++ b/yolov7-tracker-example/tracker/trackers/reid_models/deepsort_reid.py
@@ -0,0 +1,157 @@
+"""
+file for DeepSORT Re-ID model
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import cv2
+import logging
+import torchvision.transforms as transforms
+
+
+class BasicBlock(nn.Module):
+    def __init__(self, c_in, c_out, is_downsample=False):
+        super(BasicBlock, self).__init__()
+        self.is_downsample = is_downsample
+        if is_downsample:
+            self.conv1 = nn.Conv2d(
+                c_in, c_out, 3, stride=2, padding=1, bias=False)
+        else:
+            self.conv1 = nn.Conv2d(
+                c_in, c_out, 3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(c_out)
+        self.relu = nn.ReLU(True)
+        self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(c_out)
+        if is_downsample:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+        elif c_in != c_out:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+            self.is_downsample = True
+
+    def forward(self, x):
+        y = self.conv1(x)
+        y = self.bn1(y)
+        y = self.relu(y)
+        y = self.conv2(y)
+        y = self.bn2(y)
+        if self.is_downsample:
+            x = self.downsample(x)
+        return F.relu(x.add(y), True)
+
+
+def make_layers(c_in, c_out, repeat_times, is_downsample=False):
+    blocks = []
+    for i in range(repeat_times):
+        if i == 0:
+            blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ]
+        else:
+            blocks += [BasicBlock(c_out, c_out), ]
+    return nn.Sequential(*blocks)
+
+
+class Net(nn.Module):
+    def __init__(self, num_classes=751, reid=False):
+        super(Net, self).__init__()
+        # 3 128 64
+        self.conv = nn.Sequential(
+            nn.Conv2d(3, 64, 3, stride=1, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            # nn.Conv2d(32,32,3,stride=1,padding=1),
+            # nn.BatchNorm2d(32),
+            # nn.ReLU(inplace=True),
+            nn.MaxPool2d(3, 2, padding=1),
+        )
+        # 32 64 32
+        self.layer1 = make_layers(64, 64, 2, False)
+        # 32 64 32
+        self.layer2 = make_layers(64, 128, 2, True)
+        # 64 32 16
+        self.layer3 = make_layers(128, 256, 2, True)
+        # 128 16 8
+        self.layer4 = make_layers(256, 512, 2, True)
+        # 256 8 4
+        self.avgpool = nn.AvgPool2d((8, 4), 1)
+        # 256 1 1
+        self.reid = reid
+        self.classifier = nn.Sequential(
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(inplace=True),
+            nn.Dropout(),
+            nn.Linear(256, num_classes),
+        )
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        # B x 128
+        if self.reid:
+            x = x.div(x.norm(p=2, dim=1, keepdim=True))
+            return x
+        # classifier
+        x = self.classifier(x)
+        return x
+
+
+class Extractor(object):
+    def __init__(self, model_path, use_cuda=True):
+        self.net = Net(reid=True)
+        self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
+        state_dict = torch.load(model_path, map_location=torch.device(self.device))[
+            'net_dict']
+        self.net.load_state_dict(state_dict)
+        logger = logging.getLogger("root.tracker")
+        logger.info("Loading weights from {}... Done!".format(model_path))
+        self.net.to(self.device)
+        self.size = (64, 128)
+        self.norm = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+        ])
+
+    def _preprocess(self, im_crops):
+        """
+        TODO:
+            1. to float with scale from 0 to 1
+            2. resize to (64, 128) as Market1501 dataset did
+            3. concatenate to a numpy array
+            3. to torch Tensor
+            4. normalize
+        """
+        def _resize(im, size):
+            try:
+                return cv2.resize(im.astype(np.float32)/255., size)
+            except:
+                print('Error: size in bbox exists zero, ', im.shape)
+                exit(0)
+
+        im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(
+            0) for im in im_crops], dim=0).float()
+        return im_batch
+
+    def __call__(self, im_crops):
+        if isinstance(im_crops, list):
+            im_batch = self._preprocess(im_crops)
+        else:
+            im_batch = im_crops 
+
+        with torch.no_grad():
+            im_batch = im_batch.to(self.device)
+            features = self.net(im_batch)
+        return features
--- a/yolov7-tracker-example/tracker/trackers/reid_models/load_model_tools.py
+++ b/yolov7-tracker-example/tracker/trackers/reid_models/load_model_tools.py
@@ -0,0 +1,273 @@
+"""
+load checkpoint file 
+copied from https://github.com/mikel-brostrom/Yolov5_StrongSORT_OSNet
+"""
+from __future__ import division, print_function, absolute_import
+import pickle
+import shutil
+import os.path as osp
+import warnings
+from functools import partial
+from collections import OrderedDict
+import torch
+import torch.nn as nn
+
+
+__all__ = [
+    'save_checkpoint', 'load_checkpoint', 'resume_from_checkpoint',
+    'open_all_layers', 'open_specified_layers', 'count_num_param',
+    'load_pretrained_weights'
+]
+
+def load_checkpoint(fpath):
+    r"""Loads checkpoint.
+
+    ``UnicodeDecodeError`` can be well handled, which means
+    python2-saved files can be read from python3.
+
+    Args:
+        fpath (str): path to checkpoint.
+
+    Returns:
+        dict
+
+    Examples::
+        >>> from torchreid.utils import load_checkpoint
+        >>> fpath = 'log/my_model/model.pth.tar-10'
+        >>> checkpoint = load_checkpoint(fpath)
+    """
+    if fpath is None:
+        raise ValueError('File path is None')
+    fpath = osp.abspath(osp.expanduser(fpath))
+    if not osp.exists(fpath):
+        raise FileNotFoundError('File is not found at "{}"'.format(fpath))
+    map_location = None if torch.cuda.is_available() else 'cpu'
+    try:
+        checkpoint = torch.load(fpath, map_location=map_location)
+    except UnicodeDecodeError:
+        pickle.load = partial(pickle.load, encoding="latin1")
+        pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1")
+        checkpoint = torch.load(
+            fpath, pickle_module=pickle, map_location=map_location
+        )
+    except Exception:
+        print('Unable to load checkpoint from "{}"'.format(fpath))
+        raise
+    return checkpoint
+
+
+def resume_from_checkpoint(fpath, model, optimizer=None, scheduler=None):
+    r"""Resumes training from a checkpoint.
+
+    This will load (1) model weights and (2) ``state_dict``
+    of optimizer if ``optimizer`` is not None.
+
+    Args:
+        fpath (str): path to checkpoint.
+        model (nn.Module): model.
+        optimizer (Optimizer, optional): an Optimizer.
+        scheduler (LRScheduler, optional): an LRScheduler.
+
+    Returns:
+        int: start_epoch.
+
+    Examples::
+        >>> from torchreid.utils import resume_from_checkpoint
+        >>> fpath = 'log/my_model/model.pth.tar-10'
+        >>> start_epoch = resume_from_checkpoint(
+        >>>     fpath, model, optimizer, scheduler
+        >>> )
+    """
+    print('Loading checkpoint from "{}"'.format(fpath))
+    checkpoint = load_checkpoint(fpath)
+    model.load_state_dict(checkpoint['state_dict'])
+    print('Loaded model weights')
+    if optimizer is not None and 'optimizer' in checkpoint.keys():
+        optimizer.load_state_dict(checkpoint['optimizer'])
+        print('Loaded optimizer')
+    if scheduler is not None and 'scheduler' in checkpoint.keys():
+        scheduler.load_state_dict(checkpoint['scheduler'])
+        print('Loaded scheduler')
+    start_epoch = checkpoint['epoch']
+    print('Last epoch = {}'.format(start_epoch))
+    if 'rank1' in checkpoint.keys():
+        print('Last rank1 = {:.1%}'.format(checkpoint['rank1']))
+    return start_epoch
+
+
+def adjust_learning_rate(
+    optimizer,
+    base_lr,
+    epoch,
+    stepsize=20,
+    gamma=0.1,
+    linear_decay=False,
+    final_lr=0,
+    max_epoch=100
+):
+    r"""Adjusts learning rate.
+
+    Deprecated.
+    """
+    if linear_decay:
+        # linearly decay learning rate from base_lr to final_lr
+        frac_done = epoch / max_epoch
+        lr = frac_done*final_lr + (1.-frac_done) * base_lr
+    else:
+        # decay learning rate by gamma for every stepsize
+        lr = base_lr * (gamma**(epoch // stepsize))
+
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+
+
+def set_bn_to_eval(m):
+    r"""Sets BatchNorm layers to eval mode."""
+    # 1. no update for running mean and var
+    # 2. scale and shift parameters are still trainable
+    classname = m.__class__.__name__
+    if classname.find('BatchNorm') != -1:
+        m.eval()
+
+
+def open_all_layers(model):
+    r"""Opens all layers in model for training.
+
+    Examples::
+        >>> from torchreid.utils import open_all_layers
+        >>> open_all_layers(model)
+    """
+    model.train()
+    for p in model.parameters():
+        p.requires_grad = True
+
+
+def open_specified_layers(model, open_layers):
+    r"""Opens specified layers in model for training while keeping
+    other layers frozen.
+
+    Args:
+        model (nn.Module): neural net model.
+        open_layers (str or list): layers open for training.
+
+    Examples::
+        >>> from torchreid.utils import open_specified_layers
+        >>> # Only model.classifier will be updated.
+        >>> open_layers = 'classifier'
+        >>> open_specified_layers(model, open_layers)
+        >>> # Only model.fc and model.classifier will be updated.
+        >>> open_layers = ['fc', 'classifier']
+        >>> open_specified_layers(model, open_layers)
+    """
+    if isinstance(model, nn.DataParallel):
+        model = model.module
+
+    if isinstance(open_layers, str):
+        open_layers = [open_layers]
+
+    for layer in open_layers:
+        assert hasattr(
+            model, layer
+        ), '"{}" is not an attribute of the model, please provide the correct name'.format(
+            layer
+        )
+
+    for name, module in model.named_children():
+        if name in open_layers:
+            module.train()
+            for p in module.parameters():
+                p.requires_grad = True
+        else:
+            module.eval()
+            for p in module.parameters():
+                p.requires_grad = False
+
+
+def count_num_param(model):
+    r"""Counts number of parameters in a model while ignoring ``self.classifier``.
+
+    Args:
+        model (nn.Module): network model.
+
+    Examples::
+        >>> from torchreid.utils import count_num_param
+        >>> model_size = count_num_param(model)
+
+    .. warning::
+        
+        This method is deprecated in favor of
+        ``torchreid.utils.compute_model_complexity``.
+    """
+    warnings.warn(
+        'This method is deprecated and will be removed in the future.'
+    )
+
+    num_param = sum(p.numel() for p in model.parameters())
+
+    if isinstance(model, nn.DataParallel):
+        model = model.module
+
+    if hasattr(model,
+               'classifier') and isinstance(model.classifier, nn.Module):
+        # we ignore the classifier because it is unused at test time
+        num_param -= sum(p.numel() for p in model.classifier.parameters())
+
+    return num_param
+
+
+def load_pretrained_weights(model, weight_path):
+    r"""Loads pretrianed weights to model.
+
+    Features::
+        - Incompatible layers (unmatched in name or size) will be ignored.
+        - Can automatically deal with keys containing "module.".
+
+    Args:
+        model (nn.Module): network model.
+        weight_path (str): path to pretrained weights.
+
+    Examples::
+        >>> from torchreid.utils import load_pretrained_weights
+        >>> weight_path = 'log/my_model/model-best.pth.tar'
+        >>> load_pretrained_weights(model, weight_path)
+    """
+    checkpoint = load_checkpoint(weight_path)
+    if 'state_dict' in checkpoint:
+        state_dict = checkpoint['state_dict']
+    else:
+        state_dict = checkpoint
+
+    model_dict = model.state_dict()
+    new_state_dict = OrderedDict()
+    matched_layers, discarded_layers = [], []
+
+    for k, v in state_dict.items():
+        if k.startswith('module.'):
+            k = k[7:] # discard module.
+
+        if k in model_dict and model_dict[k].size() == v.size():
+            new_state_dict[k] = v
+            matched_layers.append(k)
+        else:
+            discarded_layers.append(k)
+
+    model_dict.update(new_state_dict)
+    model.load_state_dict(model_dict)
+
+    if len(matched_layers) == 0:
+        warnings.warn(
+            'The pretrained weights "{}" cannot be loaded, '
+            'please check the key names manually '
+            '(** ignored and continue **)'.format(weight_path)
+        )
+    else:
+        print(
+            'Successfully loaded pretrained weights from "{}"'.
+            format(weight_path)
+        )
+        if len(discarded_layers) > 0:
+            print(
+                '** The following layers are discarded '
+                'due to unmatched keys or layer size: {}'.
+                format(discarded_layers)
+            )
--- a/yolov7-tracker-example/tracker/trackers/sort_tracker.py
+++ b/yolov7-tracker-example/tracker/trackers/sort_tracker.py
@@ -0,0 +1,169 @@
+"""
+Sort
+"""
+
+import numpy as np
+from collections import deque
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet
+from .matching import *
+
+class SortTracker(object):
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlbr format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        
+        scores_keep = scores[remain_inds]
+
+        if len(dets) > 0:
+            '''Detections'''
+            detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets, scores_keep, cates)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with high score detection boxes'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        dists = iou_distance(tracklet_pool, detections)
+        
+        matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detections[i] for i in u_detection]
+        dists = iou_distance(unconfirmed, detections)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 3: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 4: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/yolov7-tracker-example/tracker/trackers/sparse_tracker.py
+++ b/yolov7-tracker-example/tracker/trackers/sparse_tracker.py
@@ -0,0 +1,338 @@
+"""
+Bot sort
+"""
+
+import numpy as np  
+import torch 
+from torchvision.ops import nms
+
+import cv2 
+import torchvision.transforms as T
+
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet, Tracklet_w_depth
+from .matching import *
+
+from .reid_models.OSNet import *
+from .reid_models.load_model_tools import load_pretrained_weights
+from .reid_models.deepsort_reid import Extractor
+
+from .camera_motion_compensation import GMC
+
+REID_MODEL_DICT = {
+    'osnet_x1_0': osnet_x1_0, 
+    'osnet_x0_75': osnet_x0_75, 
+    'osnet_x0_5': osnet_x0_5, 
+    'osnet_x0_25': osnet_x0_25, 
+    'deepsort': Extractor
+}
+
+
+def load_reid_model(reid_model, reid_model_path):
+    
+    if 'osnet' in reid_model:
+        func = REID_MODEL_DICT[reid_model]
+        model = func(num_classes=1, pretrained=False, )
+        load_pretrained_weights(model, reid_model_path)
+        model.cuda().eval()
+        
+    elif 'deepsort' in reid_model:
+        model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
+
+    else:
+        raise NotImplementedError
+    
+    return model
+
+class SparseTracker(object):
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format            
+
+        # camera motion compensation module
+        self.gmc = GMC(method='orb', downscale=2, verbose=None)
+
+    def get_deep_range(self, obj, step):
+        col = []
+        for t in obj:
+            lend = (t.deep_vec)[2]
+            col.append(lend)
+        max_len, mix_len = max(col), min(col)
+        if max_len != mix_len:
+            deep_range =np.arange(mix_len, max_len, (max_len - mix_len + 1) / step)
+            if deep_range[-1] < max_len:
+                deep_range = np.concatenate([deep_range, np.array([max_len],)])
+                deep_range[0] = np.floor(deep_range[0])
+                deep_range[-1] = np.ceil(deep_range[-1])
+        else:    
+            deep_range = [mix_len,] 
+        mask = self.get_sub_mask(deep_range, col)      
+        return mask
+    
+    def get_sub_mask(self, deep_range, col):
+        mix_len=deep_range[0]
+        max_len=deep_range[-1]
+        if max_len == mix_len:
+            lc = mix_len   
+        mask = []
+        for d in deep_range:
+            if d > deep_range[0] and d < deep_range[-1]:
+                mask.append((col >= lc) & (col < d)) 
+                lc = d
+            elif d == deep_range[-1]:
+                mask.append((col >= lc) & (col <= d)) 
+                lc = d 
+            else:
+                lc = d
+                continue
+        return mask
+    
+    # core function
+    def DCM(self, detections, tracks, activated_tracklets, refind_tracklets, levels, thresh, is_fuse):
+        if len(detections) > 0:
+            det_mask = self.get_deep_range(detections, levels) 
+        else:
+            det_mask = []
+
+        if len(tracks)!=0:
+            track_mask = self.get_deep_range(tracks, levels)
+        else:
+            track_mask = []
+
+        u_detection, u_tracks, res_det, res_track = [], [], [], []
+        if len(track_mask) != 0:
+            if  len(track_mask) < len(det_mask):
+                for i in range(len(det_mask) - len(track_mask)):
+                    idx = np.argwhere(det_mask[len(track_mask) + i] == True)
+                    for idd in idx:
+                        res_det.append(detections[idd[0]])
+            elif len(track_mask) > len(det_mask):
+                for i in range(len(track_mask) - len(det_mask)):
+                    idx = np.argwhere(track_mask[len(det_mask) + i] == True)
+                    for idd in idx:
+                        res_track.append(tracks[idd[0]])
+        
+            for dm, tm in zip(det_mask, track_mask):
+                det_idx = np.argwhere(dm == True)
+                trk_idx = np.argwhere(tm == True)
+                
+                # search det 
+                det_ = []
+                for idd in det_idx:
+                    det_.append(detections[idd[0]])
+                det_ = det_ + u_detection
+                # search trk
+                track_ = []
+                for idt in trk_idx:
+                    track_.append(tracks[idt[0]])
+                # update trk
+                track_ = track_ + u_tracks
+                
+                dists = iou_distance(track_, det_)
+
+                matches, u_track_, u_det_ = linear_assignment(dists, thresh)
+                for itracked, idet in matches:
+                    track = track_[itracked]
+                    det = det_[idet]
+                    if track.state == TrackState.Tracked:
+                        track.update(det_[idet], self.frame_id)
+                        activated_tracklets.append(track)
+                    else:
+                        track.re_activate(det, self.frame_id, new_id=False)
+                        refind_tracklets.append(track)
+                u_tracks = [track_[t] for t in u_track_]
+                u_detection = [det_[t] for t in u_det_]
+                
+            u_tracks = u_tracks + res_track
+            u_detection = u_detection + res_det
+
+        else:
+            u_detection = detections
+            
+        return activated_tracklets, refind_tracklets, u_tracks, u_detection
+
+
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlwh format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+        inds_low = scores > 0.1
+        inds_high = scores < self.args.conf_thresh
+
+        inds_second = np.logical_and(inds_low, inds_high)
+        dets_second = bboxes[inds_second]
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        cates_second = categories[inds_second]
+        
+        scores_keep = scores[remain_inds]
+        scores_second = scores[inds_second]
+
+        if len(dets) > 0:
+            detections = [Tracklet_w_depth(tlwh, s, cate, motion=self.motion) for
+                            (tlwh, s, cate) in zip(dets, scores_keep, cates)]
+        else:
+            detections = []
+
+        ''' Step 1: Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with high score detection boxes, depth cascade mathcing'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        # Camera motion compensation
+        warp = self.gmc.apply(ori_img, dets)
+        self.gmc.multi_gmc(tracklet_pool, warp)
+        self.gmc.multi_gmc(unconfirmed, warp)
+
+        # depth cascade matching
+        activated_tracklets, refind_tracklets, u_track, u_detection_high = self.DCM(
+                                                                                detections, 
+                                                                                tracklet_pool, 
+                                                                                activated_tracklets,
+                                                                                refind_tracklets, 
+                                                                                levels=3, 
+                                                                                thresh=0.75, 
+                                                                                is_fuse=True)  
+        
+        ''' Step 3: Second association, with low score detection boxes, depth cascade mathcing'''
+        if len(dets_second) > 0:
+            '''Detections'''
+            detections_second = [Tracklet_w_depth(tlwh, s, cate, motion=self.motion) for
+                          (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
+        else:
+            detections_second = []
+
+        r_tracked_tracklets = [t for t in u_track if t.state == TrackState.Tracked]  
+
+        activated_tracklets, refind_tracklets, u_track, u_detection_sec = self.DCM(
+                                                                                detections_second, 
+                                                                                r_tracked_tracklets, 
+                                                                                activated_tracklets, 
+                                                                                refind_tracklets, 
+                                                                                levels=3, 
+                                                                                thresh=0.3, 
+                                                                                is_fuse=False) 
+        
+        for track in u_track:
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = u_detection_high
+        dists = iou_distance(unconfirmed, detections)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/yolov7-tracker-example/tracker/trackers/strongsort_tracker.py
+++ b/yolov7-tracker-example/tracker/trackers/strongsort_tracker.py
@@ -0,0 +1,327 @@
+"""
+Deep Sort
+"""
+
+import numpy as np  
+import torch 
+from torchvision.ops import nms
+
+import cv2 
+import torchvision.transforms as T
+
+from .basetrack import BaseTrack, TrackState
+from .tracklet import Tracklet, Tracklet_w_reid
+from .matching import *
+
+from .reid_models.OSNet import *
+from .reid_models.load_model_tools import load_pretrained_weights
+from .reid_models.deepsort_reid import Extractor
+
+REID_MODEL_DICT = {
+    'osnet_x1_0': osnet_x1_0, 
+    'osnet_x0_75': osnet_x0_75, 
+    'osnet_x0_5': osnet_x0_5, 
+    'osnet_x0_25': osnet_x0_25, 
+    'deepsort': Extractor
+}
+
+
+def load_reid_model(reid_model, reid_model_path):
+    
+    if 'osnet' in reid_model:
+        func = REID_MODEL_DICT[reid_model]
+        model = func(num_classes=1, pretrained=False, )
+        load_pretrained_weights(model, reid_model_path)
+        model.cuda().eval()
+        
+    elif 'deepsort' in reid_model:
+        model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
+
+    else:
+        raise NotImplementedError
+    
+    return model
+
+
+class StrongSortTracker(object):
+
+    def __init__(self, args, frame_rate=30):
+        self.tracked_tracklets = []  # type: list[Tracklet]
+        self.lost_tracklets = []  # type: list[Tracklet]
+        self.removed_tracklets = []  # type: list[Tracklet]
+
+        self.frame_id = 0
+        self.args = args
+
+        self.det_thresh = args.conf_thresh + 0.1
+        self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
+        self.max_time_lost = self.buffer_size
+
+        self.motion = args.kalman_format
+
+        self.with_reid = not args.discard_reid
+
+        self.reid_model, self.crop_transforms = None, None 
+        if self.with_reid:
+            self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
+            self.crop_transforms = T.Compose([
+            # T.ToPILImage(),
+            # T.Resize(size=(256, 128)),
+            T.ToTensor(),  # (c, 128, 256)
+            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        ])
+            
+        self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128)
+
+        self.lambda_ = 0.98  # the coef of cost mix in eq. 10 in paper
+        
+
+    def reid_preprocess(self, obj_bbox):
+        """
+        preprocess cropped object bboxes 
+        
+        obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
+
+        return: 
+        torch.Tensor of shape (c, 128, 256)
+        """
+
+        obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size)  # shape: (h, w, c)
+
+        return self.crop_transforms(obj_bbox)
+
+    def get_feature(self, tlwhs, ori_img):
+        """
+        get apperance feature of an object
+        tlwhs: shape (num_of_objects, 4)
+        ori_img: original image, np.ndarray, shape(H, W, C)
+        """
+        obj_bbox = []
+
+        for tlwh in tlwhs:
+            tlwh = list(map(int, tlwh))
+
+            # limit to the legal range
+            tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0)
+            
+            tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
+
+            obj_bbox.append(tlbr_tensor)
+        
+        if not obj_bbox:
+            return np.array([])
+        
+        obj_bbox = torch.stack(obj_bbox, dim=0)
+        obj_bbox = obj_bbox.cuda()  
+        
+        features = self.reid_model(obj_bbox)  # shape: (num_of_objects, feature_dim)
+        return features.cpu().detach().numpy()
+    
+    def update(self, output_results, img, ori_img):
+        """
+        output_results: processed detections (scale to original size) tlbr format
+        """
+
+        self.frame_id += 1
+        activated_tracklets = []
+        refind_tracklets = []
+        lost_tracklets = []
+        removed_tracklets = []
+
+        scores = output_results[:, 4]
+        bboxes = output_results[:, :4]
+        categories = output_results[:, -1]
+
+        remain_inds = scores > self.args.conf_thresh
+
+        dets = bboxes[remain_inds]
+
+        cates = categories[remain_inds]
+        
+        scores_keep = scores[remain_inds]
+
+        features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
+
+        if len(dets) > 0:
+            '''Detections'''
+            detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
+                          (tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
+        else:
+            detections = []
+
+        ''' Add newly detected tracklets to tracked_tracklets'''
+        unconfirmed = []
+        tracked_tracklets = []  # type: list[Tracklet]
+        for track in self.tracked_tracklets:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_tracklets.append(track)
+
+        ''' Step 2: First association, with appearance'''
+        tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
+
+        # Predict the current location with Kalman
+        for tracklet in tracklet_pool:
+            tracklet.predict()
+
+        # vallina matching
+        cost_matrix = self.gated_metric(tracklet_pool, detections)
+        matches, u_track, u_detection = linear_assignment(cost_matrix, thresh=0.9)
+
+        for itracked, idet in matches:
+            track = tracklet_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detections[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        '''Step 3: Second association, with iou'''
+        tracklet_for_iou = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
+        detection_for_iou = [detections[i] for i in u_detection]
+
+        dists = iou_distance(tracklet_for_iou, detection_for_iou)
+
+        matches, u_track, u_detection = linear_assignment(dists, thresh=0.5)
+
+        for itracked, idet in matches:
+            track = tracklet_for_iou[itracked]
+            det = detection_for_iou[idet]
+            if track.state == TrackState.Tracked:
+                track.update(detection_for_iou[idet], self.frame_id)
+                activated_tracklets.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_tracklets.append(track)
+
+        for it in u_track:
+            track = tracklet_for_iou[it]
+            if not track.state == TrackState.Lost:
+                track.mark_lost()
+                lost_tracklets.append(track)
+
+
+
+        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+        detections = [detection_for_iou[i] for i in u_detection]
+        dists = iou_distance(unconfirmed, detections)
+       
+        matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
+
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_tracklets.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_tracklets.append(track)
+
+        """ Step 4: Init new tracklets"""
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.det_thresh:
+                continue
+            track.activate(self.frame_id)
+            activated_tracklets.append(track)
+
+        """ Step 5: Update state"""
+        for track in self.lost_tracklets:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_tracklets.append(track)
+
+        # print('Ramained match {} s'.format(t4-t3))
+
+        self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
+        self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
+        self.lost_tracklets.extend(lost_tracklets)
+        self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
+        self.removed_tracklets.extend(removed_tracklets)
+        self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
+        # get scores of lost tracks
+        output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
+
+        return output_tracklets
+    
+    def gated_metric(self, tracks, dets):
+        """
+        get cost matrix, firstly calculate apperence cost, then filter by Kalman state.
+
+        tracks: List[STrack]
+        dets: List[STrack]
+        """
+        apperance_dist = embedding_distance(tracks=tracks, detections=dets, metric='cosine')
+        cost_matrix = self.gate_cost_matrix(apperance_dist, tracks, dets, )
+        return cost_matrix
+    
+    def gate_cost_matrix(self, cost_matrix, tracks, dets, max_apperance_thresh=0.15, gated_cost=1e5, only_position=False):
+        """
+        gate cost matrix by calculating the Kalman state distance and constrainted by
+        0.95 confidence interval of x2 distribution
+
+        cost_matrix: np.ndarray, shape (len(tracks), len(dets))
+        tracks: List[STrack]
+        dets: List[STrack]
+        gated_cost: a very largt const to infeasible associations
+        only_position: use [xc, yc, a, h] as state vector or only use [xc, yc]
+
+        return:
+        updated cost_matirx, np.ndarray
+        """
+        gating_dim = 2 if only_position else 4
+        gating_threshold = chi2inv95[gating_dim]
+        measurements = np.asarray([Tracklet.tlwh_to_xyah(det.tlwh) for det in dets])  # (len(dets), 4)
+
+        cost_matrix[cost_matrix > max_apperance_thresh] = gated_cost
+        for row, track in enumerate(tracks):
+            gating_distance = track.kalman_filter.gating_distance(measurements, )
+            cost_matrix[row, gating_distance > gating_threshold] = gated_cost
+
+            cost_matrix[row] = self.lambda_ * cost_matrix[row] + (1 - self.lambda_) *  gating_distance
+        return cost_matrix
+    
+
+def joint_tracklets(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_tracklets(tlista, tlistb):
+    tracklets = {}
+    for t in tlista:
+        tracklets[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if tracklets.get(tid, 0):
+            del tracklets[tid]
+    return list(tracklets.values())
+
+
+def remove_duplicate_tracklets(trackletsa, trackletsb):
+    pdist = iou_distance(trackletsa, trackletsb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = trackletsa[p].frame_id - trackletsa[p].start_frame
+        timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
+    resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
+    return resa, resb
--- a/yolov7-tracker-example/tracker/trackers/tracklet.py
+++ b/yolov7-tracker-example/tracker/trackers/tracklet.py
@@ -0,0 +1,366 @@
+"""
+implements base elements of trajectory
+"""
+
+import numpy as np 
+from collections import deque
+
+from .basetrack import BaseTrack, TrackState 
+from .kalman_filters.bytetrack_kalman import ByteKalman
+from .kalman_filters.botsort_kalman import BotKalman
+from .kalman_filters.ocsort_kalman import OCSORTKalman
+from .kalman_filters.sort_kalman import SORTKalman
+from .kalman_filters.strongsort_kalman import NSAKalman
+
+MOTION_MODEL_DICT = {
+    'sort': SORTKalman, 
+    'byte': ByteKalman, 
+    'bot': BotKalman, 
+    'ocsort': OCSORTKalman, 
+    'strongsort': NSAKalman, 
+}
+
+STATE_CONVERT_DICT = {
+    'sort': 'xysa', 
+    'byte': 'xyah', 
+    'bot': 'xywh', 
+    'ocsort': 'xysa', 
+    'strongsort': 'xyah'
+}
+
+class Tracklet(BaseTrack):
+    def __init__(self, tlwh, score, category, motion='byte'):
+
+        # initial position
+        self._tlwh = np.asarray(tlwh, dtype=np.float)
+        self.is_activated = False
+
+        self.score = score
+        self.category = category
+
+        # kalman
+        self.motion = motion
+        self.kalman_filter = MOTION_MODEL_DICT[motion]()
+        
+        self.convert_func = self.__getattribute__('tlwh_to_' + STATE_CONVERT_DICT[motion])
+
+        # init kalman
+        self.kalman_filter.initialize(self.convert_func(self._tlwh))
+
+    def predict(self):
+        self.kalman_filter.predict()
+        self.time_since_update += 1
+
+    def activate(self, frame_id):
+        self.track_id = self.next_id()
+
+        self.state = TrackState.Tracked
+        if frame_id == 1:
+            self.is_activated = True
+        self.frame_id = frame_id
+        self.start_frame = frame_id
+
+
+    def re_activate(self, new_track, frame_id, new_id=False):
+        
+        # TODO different convert
+        self.kalman_filter.update(self.convert_func(new_track.tlwh))
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+        self.frame_id = frame_id
+        if new_id:
+            self.track_id = self.next_id()
+        self.score = new_track.score
+
+    def update(self, new_track, frame_id):
+        self.frame_id = frame_id
+
+        new_tlwh = new_track.tlwh
+        self.score = new_track.score
+
+        self.kalman_filter.update(self.convert_func(new_tlwh))
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+
+        self.time_since_update = 0
+    
+    @property
+    def tlwh(self):
+        """Get current position in bounding box format `(top left x, top left y,
+                width, height)`.
+        """
+        return self.__getattribute__(STATE_CONVERT_DICT[self.motion] + '_to_tlwh')()
+    
+    def xyah_to_tlwh(self, ):
+        x = self.kalman_filter.kf.x 
+        ret = x[:4].copy()
+        ret[2] *= ret[3]
+        ret[:2] -= ret[2:] / 2
+        return ret
+
+    def xywh_to_tlwh(self, ):
+        x = self.kalman_filter.kf.x 
+        ret = x[:4].copy()
+        ret[:2] -= ret[2:] / 2
+        return ret
+    
+    def xysa_to_tlwh(self, ):
+        x = self.kalman_filter.kf.x 
+        ret = x[:4].copy()
+        ret[2] = np.sqrt(x[2] * x[3])
+        ret[3] = x[2] / ret[2]
+
+        ret[:2] -= ret[2:] / 2
+        return ret
+    
+
+class Tracklet_w_reid(Tracklet):
+    """
+    Tracklet class with reid features, for botsort, deepsort, etc.
+    """
+    
+    def __init__(self, tlwh, score, category, motion='byte', 
+                 feat=None, feat_history=50):
+        super().__init__(tlwh, score, category, motion)
+
+        self.smooth_feat = None  # EMA feature
+        self.curr_feat = None  # current feature
+        self.features = deque([], maxlen=feat_history)  # all features
+        if feat is not None:
+            self.update_features(feat)
+
+        self.alpha = 0.9
+
+    def update_features(self, feat):
+        feat /= np.linalg.norm(feat)
+        self.curr_feat = feat
+        if self.smooth_feat is None:
+            self.smooth_feat = feat
+        else:
+            self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat
+        self.features.append(feat)
+        self.smooth_feat /= np.linalg.norm(self.smooth_feat)
+
+    def re_activate(self, new_track, frame_id, new_id=False):
+        
+        # TODO different convert
+        if isinstance(self.kalman_filter, NSAKalman):
+            self.kalman_filter.update(self.convert_func(new_track.tlwh), new_track.score)
+        else:
+            self.kalman_filter.update(self.convert_func(new_track.tlwh))
+
+        if new_track.curr_feat is not None:
+            self.update_features(new_track.curr_feat)
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+        self.frame_id = frame_id
+        if new_id:
+            self.track_id = self.next_id()
+        self.score = new_track.score
+
+    def update(self, new_track, frame_id):
+        self.frame_id = frame_id
+
+        new_tlwh = new_track.tlwh
+        self.score = new_track.score
+
+        if isinstance(self.kalman_filter, NSAKalman):
+            self.kalman_filter.update(self.convert_func(new_tlwh), self.score)
+        else:
+            self.kalman_filter.update(self.convert_func(new_tlwh))
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+
+
+        if new_track.curr_feat is not None:
+            self.update_features(new_track.curr_feat)
+
+        self.time_since_update = 0
+
+
+class Tracklet_w_velocity(Tracklet):
+    """
+    Tracklet class with reid features, for ocsort.
+    """
+    
+    def __init__(self, tlwh, score, category, motion='byte', delta_t=3):
+        super().__init__(tlwh, score, category, motion)
+
+        self.last_observation = np.array([-1, -1, -1, -1, -1])  # placeholder
+        self.observations = dict()
+        self.history_observations = []
+        self.velocity = None
+        self.delta_t = delta_t
+
+        self.age = 0  # mark the age
+
+    @staticmethod
+    def speed_direction(bbox1, bbox2):
+        cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0
+        cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0
+        speed = np.array([cy2 - cy1, cx2 - cx1])
+        norm = np.sqrt((cy2 - cy1)**2 + (cx2 - cx1)**2) + 1e-6
+        return speed / norm
+    
+    def predict(self):
+        self.kalman_filter.predict()
+
+        self.age += 1
+        self.time_since_update += 1
+
+    def update(self, new_track, frame_id):
+        self.frame_id = frame_id
+
+        new_tlwh = new_track.tlwh
+        self.score = new_track.score
+
+        self.kalman_filter.update(self.convert_func(new_tlwh))
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+        self.time_since_update = 0
+
+        # update velocity and history buffer
+        new_tlbr = Tracklet_w_bbox_buffer.tlwh_to_tlbr(new_tlwh)
+
+        if self.last_observation.sum() >= 0:  # no previous observation
+            previous_box = None
+            for i in range(self.delta_t):
+                dt = self.delta_t - i
+                if self.age - dt in self.observations:
+                    previous_box = self.observations[self.age-dt]
+                    break
+            if previous_box is None:
+                previous_box = self.last_observation
+            """
+                Estimate the track speed direction with observations \Delta t steps away
+            """
+            self.velocity = self.speed_direction(previous_box, new_tlbr)
+
+        new_observation = np.r_[new_tlbr, new_track.score]
+        self.last_observation = new_observation
+        self.observations[self.age] = new_observation
+        self.history_observations.append(new_observation)
+
+
+    
+
+class Tracklet_w_bbox_buffer(Tracklet):
+    """
+    Tracklet class with buffer of bbox, for C_BIoU track.
+    """
+    def __init__(self, tlwh, score, category, motion='byte'):
+        super().__init__(tlwh, score, category, motion)
+
+        # params in motion state
+        self.b1, self.b2, self.n = 0.3, 0.5, 5
+        self.origin_bbox_buffer = deque()  # a deque store the original bbox(tlwh) from t - self.n to t, where t is the last time detected
+        self.origin_bbox_buffer.append(self._tlwh)
+        # buffered bbox, two buffer sizes
+        self.buffer_bbox1 = self.get_buffer_bbox(level=1)
+        self.buffer_bbox2 = self.get_buffer_bbox(level=2)
+        # motion state, s^{t + \delta} = o^t + (\delta / n) * \sum_{i=t-n+1}^t(o^i - o^{i-1}) = o^t + (\delta / n) * (o^t - o^{t - n})
+        self.motion_state1 = self.buffer_bbox1.copy()
+        self.motion_state2 = self.buffer_bbox2.copy()
+
+    def get_buffer_bbox(self, level=1, bbox=None):
+        """
+        get buffered bbox as: (top, left, w, h) -> (top - bw, y - bh, w + 2bw, h + 2bh)
+        level = 1: b = self.b1  level = 2: b = self.b2
+        bbox: if not None, use bbox to calculate buffer_bbox, else use self._tlwh
+        """
+        assert level in [1, 2], 'level must be 1 or 2'
+
+        b = self.b1 if level == 1 else self.b2
+
+        if bbox is None:
+            buffer_bbox = self._tlwh + np.array([-b*self._tlwh[2], -b*self._tlwh[3], 2*b*self._tlwh[2], 2*b*self._tlwh[3]])
+        else:
+            buffer_bbox = bbox + np.array([-b*bbox[2], -b*bbox[3], 2*b*bbox[2], 2*b*bbox[3]])
+        return np.maximum(0.0, buffer_bbox)
+    
+    def re_activate(self, new_track, frame_id, new_id=False):
+        
+        # TODO different convert
+        self.kalman_filter.update(self.convert_func(new_track.tlwh))
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+        self.frame_id = frame_id
+        if new_id:
+            self.track_id = self.next_id()
+        self.score = new_track.score
+
+        self._tlwh = new_track._tlwh
+        # update stored bbox
+        if (len(self.origin_bbox_buffer) > self.n):
+            self.origin_bbox_buffer.popleft()
+            self.origin_bbox_buffer.append(self._tlwh)
+        else:
+            self.origin_bbox_buffer.append(self._tlwh)
+
+        self.buffer_bbox1 = self.get_buffer_bbox(level=1)
+        self.buffer_bbox2 = self.get_buffer_bbox(level=2)
+        self.motion_state1 = self.buffer_bbox1.copy()
+        self.motion_state2 = self.buffer_bbox2.copy()
+
+    def update(self, new_track, frame_id):
+        self.frame_id = frame_id
+
+        new_tlwh = new_track.tlwh
+        self.score = new_track.score
+
+        self.kalman_filter.update(self.convert_func(new_tlwh))
+
+        self.state = TrackState.Tracked
+        self.is_activated = True
+
+        self.time_since_update = 0
+
+        # update stored bbox
+        if (len(self.origin_bbox_buffer) > self.n):
+            self.origin_bbox_buffer.popleft()
+            self.origin_bbox_buffer.append(new_tlwh)
+        else:
+            self.origin_bbox_buffer.append(new_tlwh)
+
+        # update motion state
+        if self.time_since_update:  # have some unmatched frames
+            if len(self.origin_bbox_buffer) < self.n:
+                self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh)
+                self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh)
+            else:  # s^{t + \delta} = o^t + (\delta / n) * (o^t - o^{t - n})
+                motion_state = self.origin_bbox_buffer[-1] + \
+                    (self.time_since_update / self.n) * (self.origin_bbox_buffer[-1] - self.origin_bbox_buffer[0])
+                self.motion_state1 = self.get_buffer_bbox(level=1, bbox=motion_state)
+                self.motion_state2 = self.get_buffer_bbox(level=2, bbox=motion_state)
+
+        else:  # no unmatched frames, use current detection as motion state
+            self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh)
+            self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh)
+
+
+class Tracklet_w_depth(Tracklet):
+    """
+    tracklet with depth info (i.e., 2000 - y2), for SparseTrack
+    """
+
+    def __init__(self, tlwh, score, category, motion='byte'):
+        super().__init__(tlwh, score, category, motion)
+
+
+    @property
+    # @jit(nopython=True)
+    def deep_vec(self):
+        """Convert bounding box to format `((top left, bottom right)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        cx = ret[0] + 0.5 * ret[2]
+        y2 = ret[1] +  ret[3]
+        lendth = 2000 - y2
+        return np.asarray([cx, y2, lendth], dtype=np.float)