mirror of
https://github.com/NohamR/Stage-2024.git
synced 2026-01-11 16:58:23 +00:00
clean
This commit is contained in:
133
yolov7-tracker-example/tracker/trackers/basetrack.py
Normal file
133
yolov7-tracker-example/tracker/trackers/basetrack.py
Normal file
@@ -0,0 +1,133 @@
|
||||
import numpy as np
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class TrackState(object):
|
||||
New = 0
|
||||
Tracked = 1
|
||||
Lost = 2
|
||||
Removed = 3
|
||||
|
||||
|
||||
class BaseTrack(object):
|
||||
_count = 0
|
||||
|
||||
track_id = 0
|
||||
is_activated = False
|
||||
state = TrackState.New
|
||||
|
||||
history = OrderedDict()
|
||||
features = []
|
||||
curr_feature = None
|
||||
score = 0
|
||||
start_frame = 0
|
||||
frame_id = 0
|
||||
time_since_update = 0
|
||||
|
||||
# multi-camera
|
||||
location = (np.inf, np.inf)
|
||||
|
||||
@property
|
||||
def end_frame(self):
|
||||
return self.frame_id
|
||||
|
||||
@staticmethod
|
||||
def next_id():
|
||||
BaseTrack._count += 1
|
||||
return BaseTrack._count
|
||||
|
||||
def activate(self, *args):
|
||||
raise NotImplementedError
|
||||
|
||||
def predict(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def update(self, *args, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
def mark_lost(self):
|
||||
self.state = TrackState.Lost
|
||||
|
||||
def mark_removed(self):
|
||||
self.state = TrackState.Removed
|
||||
|
||||
@property
|
||||
def tlwh(self):
|
||||
"""Get current position in bounding box format `(top left x, top left y,
|
||||
width, height)`.
|
||||
"""
|
||||
if self.mean is None:
|
||||
return self._tlwh.copy()
|
||||
ret = self.mean[:4].copy()
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
@property
|
||||
def tlbr(self):
|
||||
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
|
||||
`(top left, bottom right)`.
|
||||
"""
|
||||
ret = self.tlwh.copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
@property
|
||||
def xywh(self):
|
||||
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
|
||||
`(top left, bottom right)`.
|
||||
"""
|
||||
ret = self.tlwh.copy()
|
||||
ret[:2] += ret[2:] / 2.0
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
# @jit(nopython=True)
|
||||
def tlwh_to_xyah(tlwh):
|
||||
"""Convert bounding box to format `(center x, center y, aspect ratio,
|
||||
height)`, where the aspect ratio is `width / height`.
|
||||
"""
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[:2] += ret[2:] / 2
|
||||
ret[2] /= ret[3]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def tlwh_to_xywh(tlwh):
|
||||
"""Convert bounding box to format `(center x, center y, width,
|
||||
height)`.
|
||||
"""
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[:2] += ret[2:] / 2
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def tlwh_to_xysa(tlwh):
|
||||
"""Convert bounding box to format `(center x, center y, width,
|
||||
height)`.
|
||||
"""
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[:2] += ret[2:] / 2
|
||||
ret[2] = tlwh[2] * tlwh[3]
|
||||
ret[3] = tlwh[2] / tlwh[3]
|
||||
return ret
|
||||
|
||||
def to_xyah(self):
|
||||
return self.tlwh_to_xyah(self.tlwh)
|
||||
|
||||
def to_xywh(self):
|
||||
return self.tlwh_to_xywh(self.tlwh)
|
||||
|
||||
@staticmethod
|
||||
def tlbr_to_tlwh(tlbr):
|
||||
ret = np.asarray(tlbr).copy()
|
||||
ret[2:] -= ret[:2]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
# @jit(nopython=True)
|
||||
def tlwh_to_tlbr(tlwh):
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
|
||||
def __repr__(self):
|
||||
return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
|
||||
329
yolov7-tracker-example/tracker/trackers/botsort_tracker.py
Normal file
329
yolov7-tracker-example/tracker/trackers/botsort_tracker.py
Normal file
@@ -0,0 +1,329 @@
|
||||
"""
|
||||
Bot sort
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchvision.ops import nms
|
||||
|
||||
import cv2
|
||||
import torchvision.transforms as T
|
||||
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet, Tracklet_w_reid
|
||||
from .matching import *
|
||||
|
||||
from .reid_models.OSNet import *
|
||||
from .reid_models.load_model_tools import load_pretrained_weights
|
||||
from .reid_models.deepsort_reid import Extractor
|
||||
|
||||
from .camera_motion_compensation import GMC
|
||||
|
||||
REID_MODEL_DICT = {
|
||||
'osnet_x1_0': osnet_x1_0,
|
||||
'osnet_x0_75': osnet_x0_75,
|
||||
'osnet_x0_5': osnet_x0_5,
|
||||
'osnet_x0_25': osnet_x0_25,
|
||||
'deepsort': Extractor
|
||||
}
|
||||
|
||||
|
||||
def load_reid_model(reid_model, reid_model_path):
|
||||
|
||||
if 'osnet' in reid_model:
|
||||
func = REID_MODEL_DICT[reid_model]
|
||||
model = func(num_classes=1, pretrained=False, )
|
||||
load_pretrained_weights(model, reid_model_path)
|
||||
model.cuda().eval()
|
||||
|
||||
elif 'deepsort' in reid_model:
|
||||
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return model
|
||||
|
||||
class BotTracker(object):
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
self.with_reid = not args.discard_reid
|
||||
|
||||
self.reid_model, self.crop_transforms = None, None
|
||||
if self.with_reid:
|
||||
self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
|
||||
self.crop_transforms = T.Compose([
|
||||
# T.ToPILImage(),
|
||||
# T.Resize(size=(256, 128)),
|
||||
T.ToTensor(), # (c, 128, 256)
|
||||
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
])
|
||||
|
||||
|
||||
# camera motion compensation module
|
||||
self.gmc = GMC(method='orb', downscale=2, verbose=None)
|
||||
|
||||
def reid_preprocess(self, obj_bbox):
|
||||
"""
|
||||
preprocess cropped object bboxes
|
||||
|
||||
obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
|
||||
|
||||
return:
|
||||
torch.Tensor of shape (c, 128, 256)
|
||||
"""
|
||||
obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=(128, 128)) # shape: (128, 256, c)
|
||||
|
||||
return self.crop_transforms(obj_bbox)
|
||||
|
||||
def get_feature(self, tlwhs, ori_img):
|
||||
"""
|
||||
get apperance feature of an object
|
||||
tlwhs: shape (num_of_objects, 4)
|
||||
ori_img: original image, np.ndarray, shape(H, W, C)
|
||||
"""
|
||||
obj_bbox = []
|
||||
|
||||
for tlwh in tlwhs:
|
||||
tlwh = list(map(int, tlwh))
|
||||
# if any(tlbr_ == -1 for tlbr_ in tlwh):
|
||||
# print(tlwh)
|
||||
|
||||
tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
|
||||
obj_bbox.append(tlbr_tensor)
|
||||
|
||||
if not obj_bbox:
|
||||
return np.array([])
|
||||
|
||||
obj_bbox = torch.stack(obj_bbox, dim=0)
|
||||
obj_bbox = obj_bbox.cuda()
|
||||
|
||||
features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim)
|
||||
return features.cpu().detach().numpy()
|
||||
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlwh format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
inds_low = scores > 0.1
|
||||
inds_high = scores < self.args.conf_thresh
|
||||
|
||||
inds_second = np.logical_and(inds_low, inds_high)
|
||||
dets_second = bboxes[inds_second]
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
cates_second = categories[inds_second]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
scores_second = scores[inds_second]
|
||||
|
||||
"""Step 1: Extract reid features"""
|
||||
if self.with_reid:
|
||||
features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
|
||||
|
||||
if len(dets) > 0:
|
||||
if self.with_reid:
|
||||
detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
|
||||
(tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
|
||||
else:
|
||||
detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with high score detection boxes'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
# Camera motion compensation
|
||||
warp = self.gmc.apply(ori_img, dets)
|
||||
self.gmc.multi_gmc(tracklet_pool, warp)
|
||||
self.gmc.multi_gmc(unconfirmed, warp)
|
||||
|
||||
ious_dists = iou_distance(tracklet_pool, detections)
|
||||
ious_dists_mask = (ious_dists > 0.5) # high conf iou
|
||||
|
||||
if self.with_reid:
|
||||
# mixed cost matrix
|
||||
emb_dists = embedding_distance(tracklet_pool, detections) / 2.0
|
||||
raw_emb_dists = emb_dists.copy()
|
||||
emb_dists[emb_dists > 0.25] = 1.0
|
||||
emb_dists[ious_dists_mask] = 1.0
|
||||
dists = np.minimum(ious_dists, emb_dists)
|
||||
|
||||
else:
|
||||
dists = ious_dists
|
||||
|
||||
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
''' Step 3: Second association, with low score detection boxes'''
|
||||
# association the untrack to the low score detections
|
||||
if len(dets_second) > 0:
|
||||
'''Detections'''
|
||||
detections_second = [Tracklet(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
|
||||
else:
|
||||
detections_second = []
|
||||
|
||||
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
dists = iou_distance(r_tracked_tracklets, detections_second)
|
||||
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
|
||||
for itracked, idet in matches:
|
||||
track = r_tracked_tracklets[itracked]
|
||||
det = detections_second[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = r_tracked_tracklets[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
ious_dists = iou_distance(unconfirmed, detections)
|
||||
ious_dists_mask = (ious_dists > 0.5)
|
||||
|
||||
if self.with_reid:
|
||||
emb_dists = embedding_distance(unconfirmed, detections) / 2.0
|
||||
raw_emb_dists = emb_dists.copy()
|
||||
emb_dists[emb_dists > 0.25] = 1.0
|
||||
emb_dists[ious_dists_mask] = 1.0
|
||||
dists = np.minimum(ious_dists, emb_dists)
|
||||
else:
|
||||
dists = ious_dists
|
||||
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
201
yolov7-tracker-example/tracker/trackers/byte_tracker.py
Normal file
201
yolov7-tracker-example/tracker/trackers/byte_tracker.py
Normal file
@@ -0,0 +1,201 @@
|
||||
"""
|
||||
ByteTrack
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet
|
||||
from .matching import *
|
||||
|
||||
class ByteTracker(object):
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlbr format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
inds_low = scores > 0.1
|
||||
inds_high = scores < self.args.conf_thresh
|
||||
|
||||
inds_second = np.logical_and(inds_low, inds_high)
|
||||
dets_second = bboxes[inds_second]
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
cates_second = categories[inds_second]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
scores_second = scores[inds_second]
|
||||
|
||||
if len(dets) > 0:
|
||||
'''Detections'''
|
||||
detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with high score detection boxes'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
dists = iou_distance(tracklet_pool, detections)
|
||||
|
||||
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
''' Step 3: Second association, with low score detection boxes'''
|
||||
# association the untrack to the low score detections
|
||||
if len(dets_second) > 0:
|
||||
'''Detections'''
|
||||
detections_second = [Tracklet(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
|
||||
else:
|
||||
detections_second = []
|
||||
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
dists = iou_distance(r_tracked_tracklets, detections_second)
|
||||
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
|
||||
for itracked, idet in matches:
|
||||
track = r_tracked_tracklets[itracked]
|
||||
det = detections_second[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = r_tracked_tracklets[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = iou_distance(unconfirmed, detections)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
204
yolov7-tracker-example/tracker/trackers/c_biou_tracker.py
Normal file
204
yolov7-tracker-example/tracker/trackers/c_biou_tracker.py
Normal file
@@ -0,0 +1,204 @@
|
||||
"""
|
||||
C_BIoU Track
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet, Tracklet_w_bbox_buffer
|
||||
from .matching import *
|
||||
|
||||
class C_BIoUTracker(object):
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlbr format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
inds_low = scores > 0.1
|
||||
inds_high = scores < self.args.conf_thresh
|
||||
|
||||
inds_second = np.logical_and(inds_low, inds_high)
|
||||
dets_second = bboxes[inds_second]
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
cates_second = categories[inds_second]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
scores_second = scores[inds_second]
|
||||
|
||||
if len(dets) > 0:
|
||||
'''Detections'''
|
||||
detections = [Tracklet_w_bbox_buffer(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with high score detection boxes'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
dists = buffered_iou_distance(tracklet_pool, detections, level=1)
|
||||
|
||||
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
''' Step 3: Second association, with low score detection boxes'''
|
||||
# association the untrack to the low score detections
|
||||
if len(dets_second) > 0:
|
||||
'''Detections'''
|
||||
detections_second = [Tracklet_w_bbox_buffer(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
|
||||
else:
|
||||
detections_second = []
|
||||
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
|
||||
|
||||
dists = buffered_iou_distance(r_tracked_tracklets, detections_second, level=2)
|
||||
|
||||
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
|
||||
for itracked, idet in matches:
|
||||
track = r_tracked_tracklets[itracked]
|
||||
det = detections_second[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = r_tracked_tracklets[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = buffered_iou_distance(unconfirmed, detections, level=1)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
@@ -0,0 +1,264 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import copy
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
"""GMC Module"""
|
||||
class GMC:
|
||||
def __init__(self, method='orb', downscale=2, verbose=None):
|
||||
super(GMC, self).__init__()
|
||||
|
||||
self.method = method
|
||||
self.downscale = max(1, int(downscale))
|
||||
|
||||
if self.method == 'orb':
|
||||
self.detector = cv2.FastFeatureDetector_create(20)
|
||||
self.extractor = cv2.ORB_create()
|
||||
self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
|
||||
|
||||
elif self.method == 'sift':
|
||||
self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
|
||||
self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
|
||||
self.matcher = cv2.BFMatcher(cv2.NORM_L2)
|
||||
|
||||
elif self.method == 'ecc':
|
||||
number_of_iterations = 100
|
||||
termination_eps = 1e-5
|
||||
self.warp_mode = cv2.MOTION_EUCLIDEAN
|
||||
self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
|
||||
|
||||
elif self.method == 'file' or self.method == 'files':
|
||||
seqName = verbose[0]
|
||||
ablation = verbose[1]
|
||||
if ablation:
|
||||
filePath = r'tracker/GMC_files/MOT17_ablation'
|
||||
else:
|
||||
filePath = r'tracker/GMC_files/MOTChallenge'
|
||||
|
||||
if '-FRCNN' in seqName:
|
||||
seqName = seqName[:-6]
|
||||
elif '-DPM' in seqName:
|
||||
seqName = seqName[:-4]
|
||||
elif '-SDP' in seqName:
|
||||
seqName = seqName[:-4]
|
||||
|
||||
self.gmcFile = open(filePath + "/GMC-" + seqName + ".txt", 'r')
|
||||
|
||||
if self.gmcFile is None:
|
||||
raise ValueError("Error: Unable to open GMC file in directory:" + filePath)
|
||||
elif self.method == 'none' or self.method == 'None':
|
||||
self.method = 'none'
|
||||
else:
|
||||
raise ValueError("Error: Unknown CMC method:" + method)
|
||||
|
||||
self.prevFrame = None
|
||||
self.prevKeyPoints = None
|
||||
self.prevDescriptors = None
|
||||
|
||||
self.initializedFirstFrame = False
|
||||
|
||||
def apply(self, raw_frame, detections=None):
|
||||
if self.method == 'orb' or self.method == 'sift':
|
||||
return self.applyFeaures(raw_frame, detections)
|
||||
elif self.method == 'ecc':
|
||||
return self.applyEcc(raw_frame, detections)
|
||||
elif self.method == 'file':
|
||||
return self.applyFile(raw_frame, detections)
|
||||
elif self.method == 'none':
|
||||
return np.eye(2, 3)
|
||||
else:
|
||||
return np.eye(2, 3)
|
||||
|
||||
def applyEcc(self, raw_frame, detections=None):
|
||||
|
||||
# Initialize
|
||||
height, width, _ = raw_frame.shape
|
||||
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
|
||||
H = np.eye(2, 3, dtype=np.float32)
|
||||
|
||||
# Downscale image (TODO: consider using pyramids)
|
||||
if self.downscale > 1.0:
|
||||
frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
|
||||
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
|
||||
width = width // self.downscale
|
||||
height = height // self.downscale
|
||||
|
||||
# Handle first frame
|
||||
if not self.initializedFirstFrame:
|
||||
# Initialize data
|
||||
self.prevFrame = frame.copy()
|
||||
|
||||
# Initialization done
|
||||
self.initializedFirstFrame = True
|
||||
|
||||
return H
|
||||
|
||||
# Run the ECC algorithm. The results are stored in warp_matrix.
|
||||
# (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
|
||||
try:
|
||||
(cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
|
||||
except:
|
||||
print('Warning: find transform failed. Set warp as identity')
|
||||
|
||||
return H
|
||||
|
||||
def applyFeaures(self, raw_frame, detections=None):
|
||||
|
||||
# Initialize
|
||||
height, width, _ = raw_frame.shape
|
||||
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
|
||||
H = np.eye(2, 3)
|
||||
|
||||
# Downscale image (TODO: consider using pyramids)
|
||||
if self.downscale > 1.0:
|
||||
# frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
|
||||
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
|
||||
width = width // self.downscale
|
||||
height = height // self.downscale
|
||||
|
||||
# find the keypoints
|
||||
mask = np.zeros_like(frame)
|
||||
# mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255
|
||||
mask[int(0.02 * height): int(0.98 * height), int(0.02 * width): int(0.98 * width)] = 255
|
||||
if detections is not None:
|
||||
for det in detections:
|
||||
tlbr = (det[:4] / self.downscale).astype(np.int_)
|
||||
mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0
|
||||
|
||||
keypoints = self.detector.detect(frame, mask)
|
||||
|
||||
# compute the descriptors
|
||||
keypoints, descriptors = self.extractor.compute(frame, keypoints)
|
||||
|
||||
# Handle first frame
|
||||
if not self.initializedFirstFrame:
|
||||
# Initialize data
|
||||
self.prevFrame = frame.copy()
|
||||
self.prevKeyPoints = copy.copy(keypoints)
|
||||
self.prevDescriptors = copy.copy(descriptors)
|
||||
|
||||
# Initialization done
|
||||
self.initializedFirstFrame = True
|
||||
|
||||
return H
|
||||
|
||||
# Match descriptors.
|
||||
knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)
|
||||
|
||||
# Filtered matches based on smallest spatial distance
|
||||
matches = []
|
||||
spatialDistances = []
|
||||
|
||||
maxSpatialDistance = 0.25 * np.array([width, height])
|
||||
|
||||
# Handle empty matches case
|
||||
if len(knnMatches) == 0:
|
||||
# Store to next iteration
|
||||
self.prevFrame = frame.copy()
|
||||
self.prevKeyPoints = copy.copy(keypoints)
|
||||
self.prevDescriptors = copy.copy(descriptors)
|
||||
|
||||
return H
|
||||
|
||||
for m, n in knnMatches:
|
||||
if m.distance < 0.9 * n.distance:
|
||||
prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
|
||||
currKeyPointLocation = keypoints[m.trainIdx].pt
|
||||
|
||||
spatialDistance = (prevKeyPointLocation[0] - currKeyPointLocation[0],
|
||||
prevKeyPointLocation[1] - currKeyPointLocation[1])
|
||||
|
||||
if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \
|
||||
(np.abs(spatialDistance[1]) < maxSpatialDistance[1]):
|
||||
spatialDistances.append(spatialDistance)
|
||||
matches.append(m)
|
||||
|
||||
meanSpatialDistances = np.mean(spatialDistances, 0)
|
||||
stdSpatialDistances = np.std(spatialDistances, 0)
|
||||
|
||||
inliesrs = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances
|
||||
|
||||
goodMatches = []
|
||||
prevPoints = []
|
||||
currPoints = []
|
||||
for i in range(len(matches)):
|
||||
if inliesrs[i, 0] and inliesrs[i, 1]:
|
||||
goodMatches.append(matches[i])
|
||||
prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
|
||||
currPoints.append(keypoints[matches[i].trainIdx].pt)
|
||||
|
||||
prevPoints = np.array(prevPoints)
|
||||
currPoints = np.array(currPoints)
|
||||
|
||||
# Draw the keypoint matches on the output image
|
||||
if 0:
|
||||
matches_img = np.hstack((self.prevFrame, frame))
|
||||
matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
|
||||
W = np.size(self.prevFrame, 1)
|
||||
for m in goodMatches:
|
||||
prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
|
||||
curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
|
||||
curr_pt[0] += W
|
||||
color = np.random.randint(0, 255, (3,))
|
||||
color = (int(color[0]), int(color[1]), int(color[2]))
|
||||
|
||||
matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
|
||||
matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)
|
||||
matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)
|
||||
|
||||
plt.figure()
|
||||
plt.imshow(matches_img)
|
||||
plt.show()
|
||||
|
||||
# Find rigid matrix
|
||||
if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
|
||||
H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
|
||||
|
||||
# Handle downscale
|
||||
if self.downscale > 1.0:
|
||||
H[0, 2] *= self.downscale
|
||||
H[1, 2] *= self.downscale
|
||||
else:
|
||||
print('Warning: not enough matching points')
|
||||
|
||||
# Store to next iteration
|
||||
self.prevFrame = frame.copy()
|
||||
self.prevKeyPoints = copy.copy(keypoints)
|
||||
self.prevDescriptors = copy.copy(descriptors)
|
||||
|
||||
return H
|
||||
|
||||
def applyFile(self, raw_frame, detections=None):
|
||||
line = self.gmcFile.readline()
|
||||
tokens = line.split("\t")
|
||||
H = np.eye(2, 3, dtype=np.float_)
|
||||
H[0, 0] = float(tokens[1])
|
||||
H[0, 1] = float(tokens[2])
|
||||
H[0, 2] = float(tokens[3])
|
||||
H[1, 0] = float(tokens[4])
|
||||
H[1, 1] = float(tokens[5])
|
||||
H[1, 2] = float(tokens[6])
|
||||
|
||||
return H
|
||||
|
||||
@staticmethod
|
||||
def multi_gmc(stracks, H=np.eye(2, 3)):
|
||||
"""
|
||||
GMC module prediction
|
||||
:param stracks: List[Strack]
|
||||
"""
|
||||
if len(stracks) > 0:
|
||||
multi_mean = np.asarray([st.kalman_filter.kf.x.copy() for st in stracks])
|
||||
multi_covariance = np.asarray([st.kalman_filter.kf.P for st in stracks])
|
||||
|
||||
R = H[:2, :2]
|
||||
R8x8 = np.kron(np.eye(4, dtype=float), R)
|
||||
t = H[:2, 2]
|
||||
|
||||
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
|
||||
mean = R8x8.dot(mean)
|
||||
mean[:2] += t
|
||||
cov = R8x8.dot(cov).dot(R8x8.transpose())
|
||||
|
||||
stracks[i].kalman_filter.kf.x = mean
|
||||
stracks[i].kalman_filter.kf.P = cov
|
||||
327
yolov7-tracker-example/tracker/trackers/deepsort_tracker.py
Normal file
327
yolov7-tracker-example/tracker/trackers/deepsort_tracker.py
Normal file
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
Deep Sort
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchvision.ops import nms
|
||||
|
||||
import cv2
|
||||
import torchvision.transforms as T
|
||||
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet, Tracklet_w_reid
|
||||
from .matching import *
|
||||
|
||||
from .reid_models.OSNet import *
|
||||
from .reid_models.load_model_tools import load_pretrained_weights
|
||||
from .reid_models.deepsort_reid import Extractor
|
||||
|
||||
REID_MODEL_DICT = {
|
||||
'osnet_x1_0': osnet_x1_0,
|
||||
'osnet_x0_75': osnet_x0_75,
|
||||
'osnet_x0_5': osnet_x0_5,
|
||||
'osnet_x0_25': osnet_x0_25,
|
||||
'deepsort': Extractor
|
||||
}
|
||||
|
||||
|
||||
def load_reid_model(reid_model, reid_model_path):
|
||||
|
||||
if 'osnet' in reid_model:
|
||||
func = REID_MODEL_DICT[reid_model]
|
||||
model = func(num_classes=1, pretrained=False, )
|
||||
load_pretrained_weights(model, reid_model_path)
|
||||
model.cuda().eval()
|
||||
|
||||
elif 'deepsort' in reid_model:
|
||||
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return model
|
||||
|
||||
|
||||
class DeepSortTracker(object):
|
||||
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
self.with_reid = not args.discard_reid
|
||||
|
||||
self.reid_model, self.crop_transforms = None, None
|
||||
if self.with_reid:
|
||||
self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
|
||||
self.crop_transforms = T.Compose([
|
||||
# T.ToPILImage(),
|
||||
# T.Resize(size=(256, 128)),
|
||||
T.ToTensor(), # (c, 128, 256)
|
||||
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
])
|
||||
|
||||
self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128)
|
||||
|
||||
|
||||
def reid_preprocess(self, obj_bbox):
|
||||
"""
|
||||
preprocess cropped object bboxes
|
||||
|
||||
obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
|
||||
|
||||
return:
|
||||
torch.Tensor of shape (c, 128, 256)
|
||||
"""
|
||||
|
||||
obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size) # shape: (h, w, c)
|
||||
|
||||
return self.crop_transforms(obj_bbox)
|
||||
|
||||
def get_feature(self, tlwhs, ori_img):
|
||||
"""
|
||||
get apperance feature of an object
|
||||
tlwhs: shape (num_of_objects, 4)
|
||||
ori_img: original image, np.ndarray, shape(H, W, C)
|
||||
"""
|
||||
obj_bbox = []
|
||||
|
||||
for tlwh in tlwhs:
|
||||
tlwh = list(map(int, tlwh))
|
||||
|
||||
# limit to the legal range
|
||||
tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0)
|
||||
|
||||
tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
|
||||
|
||||
obj_bbox.append(tlbr_tensor)
|
||||
|
||||
if not obj_bbox:
|
||||
return np.array([])
|
||||
|
||||
obj_bbox = torch.stack(obj_bbox, dim=0)
|
||||
obj_bbox = obj_bbox.cuda()
|
||||
|
||||
features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim)
|
||||
return features.cpu().detach().numpy()
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlbr format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
|
||||
features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
|
||||
|
||||
if len(dets) > 0:
|
||||
'''Detections'''
|
||||
detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
|
||||
(tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with appearance'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
|
||||
matches, u_track, u_detection = matching_cascade(distance_metric=self.gated_metric,
|
||||
matching_thresh=0.9,
|
||||
cascade_depth=30,
|
||||
tracks=tracklet_pool,
|
||||
detections=detections
|
||||
)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
'''Step 3: Second association, with iou'''
|
||||
tracklet_for_iou = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
detection_for_iou = [detections[i] for i in u_detection]
|
||||
|
||||
dists = iou_distance(tracklet_for_iou, detection_for_iou)
|
||||
|
||||
matches, u_track, u_detection = linear_assignment(dists, thresh=0.5)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_for_iou[itracked]
|
||||
det = detection_for_iou[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detection_for_iou[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = tracklet_for_iou[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detection_for_iou[i] for i in u_detection]
|
||||
dists = iou_distance(unconfirmed, detections)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
def gated_metric(self, tracks, dets):
|
||||
"""
|
||||
get cost matrix, firstly calculate apperence cost, then filter by Kalman state.
|
||||
|
||||
tracks: List[STrack]
|
||||
dets: List[STrack]
|
||||
"""
|
||||
apperance_dist = nearest_embedding_distance(tracks=tracks, detections=dets, metric='cosine')
|
||||
cost_matrix = self.gate_cost_matrix(apperance_dist, tracks, dets, )
|
||||
return cost_matrix
|
||||
|
||||
def gate_cost_matrix(self, cost_matrix, tracks, dets, max_apperance_thresh=0.15, gated_cost=1e5, only_position=False):
|
||||
"""
|
||||
gate cost matrix by calculating the Kalman state distance and constrainted by
|
||||
0.95 confidence interval of x2 distribution
|
||||
|
||||
cost_matrix: np.ndarray, shape (len(tracks), len(dets))
|
||||
tracks: List[STrack]
|
||||
dets: List[STrack]
|
||||
gated_cost: a very largt const to infeasible associations
|
||||
only_position: use [xc, yc, a, h] as state vector or only use [xc, yc]
|
||||
|
||||
return:
|
||||
updated cost_matirx, np.ndarray
|
||||
"""
|
||||
gating_dim = 2 if only_position else 4
|
||||
gating_threshold = chi2inv95[gating_dim]
|
||||
measurements = np.asarray([Tracklet.tlwh_to_xyah(det.tlwh) for det in dets]) # (len(dets), 4)
|
||||
|
||||
cost_matrix[cost_matrix > max_apperance_thresh] = gated_cost
|
||||
for row, track in enumerate(tracks):
|
||||
gating_distance = track.kalman_filter.gating_distance(measurements, )
|
||||
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
|
||||
return cost_matrix
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
@@ -0,0 +1,74 @@
|
||||
from filterpy.kalman import KalmanFilter
|
||||
import numpy as np
|
||||
import scipy
|
||||
|
||||
class BaseKalman:
|
||||
|
||||
def __init__(self,
|
||||
state_dim: int = 8,
|
||||
observation_dim: int = 4,
|
||||
F: np.ndarray = np.zeros((0, )),
|
||||
P: np.ndarray = np.zeros((0, )),
|
||||
Q: np.ndarray = np.zeros((0, )),
|
||||
H: np.ndarray = np.zeros((0, )),
|
||||
R: np.ndarray = np.zeros((0, )),
|
||||
) -> None:
|
||||
|
||||
self.kf = KalmanFilter(dim_x=state_dim, dim_z=observation_dim, dim_u=0)
|
||||
if F.shape[0] > 0: self.kf.F = F # if valid
|
||||
if P.shape[0] > 0: self.kf.P = P
|
||||
if Q.shape[0] > 0: self.kf.Q = Q
|
||||
if H.shape[0] > 0: self.kf.H = H
|
||||
if R.shape[0] > 0: self.kf.R = R
|
||||
|
||||
def initialize(self, observation):
|
||||
return NotImplementedError
|
||||
|
||||
def predict(self, ):
|
||||
self.kf.predict()
|
||||
|
||||
def update(self, observation, **kwargs):
|
||||
self.kf.update(observation, self.R, self.H)
|
||||
|
||||
def get_state(self, ):
|
||||
return self.kf.x
|
||||
|
||||
def gating_distance(self, measurements, only_position=False):
|
||||
"""Compute gating distance between state distribution and measurements.
|
||||
A suitable distance threshold can be obtained from `chi2inv95`. If
|
||||
`only_position` is False, the chi-square distribution has 4 degrees of
|
||||
freedom, otherwise 2.
|
||||
Parameters
|
||||
----------
|
||||
measurements : ndarray
|
||||
An Nx4 dimensional matrix of N measurements, note the format (whether xywh or xyah or others)
|
||||
should be identical to state definition
|
||||
only_position : Optional[bool]
|
||||
If True, distance computation is done with respect to the bounding
|
||||
box center position only.
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
Returns an array of length N, where the i-th element contains the
|
||||
squared Mahalanobis distance between (mean, covariance) and
|
||||
`measurements[i]`.
|
||||
"""
|
||||
|
||||
# map state space to measurement space
|
||||
mean = self.kf.x.copy()
|
||||
mean = np.dot(self.kf.H, mean)
|
||||
covariance = np.linalg.multi_dot((self.kf.H, self.kf.P, self.kf.H.T))
|
||||
|
||||
if only_position:
|
||||
mean, covariance = mean[:2], covariance[:2, :2]
|
||||
measurements = measurements[:, :2]
|
||||
|
||||
cholesky_factor = np.linalg.cholesky(covariance)
|
||||
d = measurements - mean
|
||||
z = scipy.linalg.solve_triangular(
|
||||
cholesky_factor, d.T, lower=True, check_finite=False,
|
||||
overwrite_b=True)
|
||||
squared_maha = np.sum(z * z, axis=0)
|
||||
return squared_maha
|
||||
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
from numpy.core.multiarray import zeros as zeros
|
||||
from .base_kalman import BaseKalman
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
class BotKalman(BaseKalman):
|
||||
|
||||
def __init__(self, ):
|
||||
|
||||
state_dim = 8 # [x, y, w, h, vx, vy, vw, vh]
|
||||
observation_dim = 4
|
||||
|
||||
F = np.eye(state_dim, state_dim)
|
||||
'''
|
||||
[1, 0, 0, 0, 1, 0, 0]
|
||||
[0, 1, 0, 0, 0, 1, 0]
|
||||
...
|
||||
'''
|
||||
for i in range(state_dim // 2):
|
||||
F[i, i + state_dim // 2] = 1
|
||||
|
||||
H = np.eye(state_dim // 2, state_dim)
|
||||
|
||||
super().__init__(state_dim=state_dim,
|
||||
observation_dim=observation_dim,
|
||||
F=F,
|
||||
H=H)
|
||||
|
||||
self._std_weight_position = 1. / 20
|
||||
self._std_weight_velocity = 1. / 160
|
||||
|
||||
def initialize(self, observation):
|
||||
""" init x, P, Q, R
|
||||
|
||||
Args:
|
||||
observation: x-y-w-h format
|
||||
"""
|
||||
# init x, P, Q, R
|
||||
|
||||
mean_pos = observation
|
||||
mean_vel = np.zeros_like(observation)
|
||||
self.kf.x = np.r_[mean_pos, mean_vel] # x_{0, 0}
|
||||
|
||||
std = [
|
||||
2 * self._std_weight_position * observation[2], # related to h
|
||||
2 * self._std_weight_position * observation[3],
|
||||
2 * self._std_weight_position * observation[2],
|
||||
2 * self._std_weight_position * observation[3],
|
||||
10 * self._std_weight_velocity * observation[2],
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
10 * self._std_weight_velocity * observation[2],
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
]
|
||||
|
||||
self.kf.P = np.diag(np.square(std)) # P_{0, 0}
|
||||
|
||||
def predict(self, ):
|
||||
""" predict step
|
||||
|
||||
x_{n + 1, n} = F * x_{n, n}
|
||||
P_{n + 1, n} = F * P_{n, n} * F^T + Q
|
||||
|
||||
"""
|
||||
std_pos = [
|
||||
self._std_weight_position * self.kf.x[2],
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
self._std_weight_position * self.kf.x[2],
|
||||
self._std_weight_position * self.kf.x[3]]
|
||||
std_vel = [
|
||||
self._std_weight_velocity * self.kf.x[2],
|
||||
self._std_weight_velocity * self.kf.x[3],
|
||||
self._std_weight_velocity * self.kf.x[2],
|
||||
self._std_weight_velocity * self.kf.x[3]]
|
||||
|
||||
Q = np.diag(np.square(np.r_[std_pos, std_vel]))
|
||||
|
||||
self.kf.predict(Q=Q)
|
||||
|
||||
def update(self, z):
|
||||
""" update step
|
||||
|
||||
Args:
|
||||
z: observation x-y-a-h format
|
||||
|
||||
K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
|
||||
x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
|
||||
P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
|
||||
|
||||
"""
|
||||
|
||||
std = [
|
||||
self._std_weight_position * self.kf.x[2],
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
self._std_weight_position * self.kf.x[2],
|
||||
self._std_weight_position * self.kf.x[3]]
|
||||
|
||||
R = np.diag(np.square(std))
|
||||
|
||||
self.kf.update(z=z, R=R)
|
||||
@@ -0,0 +1,97 @@
|
||||
from .base_kalman import BaseKalman
|
||||
import numpy as np
|
||||
|
||||
class ByteKalman(BaseKalman):
|
||||
|
||||
def __init__(self, ):
|
||||
|
||||
state_dim = 8 # [x, y, a, h, vx, vy, va, vh]
|
||||
observation_dim = 4
|
||||
|
||||
F = np.eye(state_dim, state_dim)
|
||||
'''
|
||||
[1, 0, 0, 0, 1, 0, 0]
|
||||
[0, 1, 0, 0, 0, 1, 0]
|
||||
...
|
||||
'''
|
||||
for i in range(state_dim // 2):
|
||||
F[i, i + state_dim // 2] = 1
|
||||
|
||||
H = np.eye(state_dim // 2, state_dim)
|
||||
|
||||
super().__init__(state_dim=state_dim,
|
||||
observation_dim=observation_dim,
|
||||
F=F,
|
||||
H=H)
|
||||
|
||||
self._std_weight_position = 1. / 20
|
||||
self._std_weight_velocity = 1. / 160
|
||||
|
||||
def initialize(self, observation):
|
||||
""" init x, P, Q, R
|
||||
|
||||
Args:
|
||||
observation: x-y-a-h format
|
||||
"""
|
||||
# init x, P, Q, R
|
||||
|
||||
mean_pos = observation
|
||||
mean_vel = np.zeros_like(observation)
|
||||
self.kf.x = np.r_[mean_pos, mean_vel] # x_{0, 0}
|
||||
|
||||
std = [
|
||||
2 * self._std_weight_position * observation[3], # related to h
|
||||
2 * self._std_weight_position * observation[3],
|
||||
1e-2,
|
||||
2 * self._std_weight_position * observation[3],
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
1e-5,
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
]
|
||||
|
||||
self.kf.P = np.diag(np.square(std)) # P_{0, 0}
|
||||
|
||||
def predict(self, ):
|
||||
""" predict step
|
||||
|
||||
x_{n + 1, n} = F * x_{n, n}
|
||||
P_{n + 1, n} = F * P_{n, n} * F^T + Q
|
||||
|
||||
"""
|
||||
std_pos = [
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
1e-2,
|
||||
self._std_weight_position * self.kf.x[3]]
|
||||
std_vel = [
|
||||
self._std_weight_velocity * self.kf.x[3],
|
||||
self._std_weight_velocity * self.kf.x[3],
|
||||
1e-5,
|
||||
self._std_weight_velocity * self.kf.x[3]]
|
||||
|
||||
Q = np.diag(np.square(np.r_[std_pos, std_vel]))
|
||||
|
||||
self.kf.predict(Q=Q)
|
||||
|
||||
def update(self, z):
|
||||
""" update step
|
||||
|
||||
Args:
|
||||
z: observation x-y-a-h format
|
||||
|
||||
K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
|
||||
x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
|
||||
P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
|
||||
|
||||
"""
|
||||
|
||||
std = [
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
1e-1,
|
||||
self._std_weight_position * self.kf.x[3]]
|
||||
|
||||
R = np.diag(np.square(std))
|
||||
|
||||
self.kf.update(z=z, R=R)
|
||||
@@ -0,0 +1,144 @@
|
||||
from numpy.core.multiarray import zeros as zeros
|
||||
from .base_kalman import BaseKalman
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
|
||||
class OCSORTKalman(BaseKalman):
|
||||
|
||||
def __init__(self, ):
|
||||
|
||||
state_dim = 7 # [x, y, s, a, vx, vy, vs] s: area
|
||||
observation_dim = 4
|
||||
|
||||
F = np.array([[1, 0, 0, 0, 1, 0, 0],
|
||||
[0, 1, 0, 0, 0, 1, 0],
|
||||
[0, 0, 1, 0, 0, 0, 1],
|
||||
[0, 0, 0, 1, 0, 0, 0],
|
||||
[0, 0, 0, 0, 1, 0, 0],
|
||||
[0, 0, 0, 0, 0, 1, 0],
|
||||
[0, 0, 0, 0, 0, 0, 1]])
|
||||
|
||||
H = np.eye(state_dim // 2 + 1, state_dim)
|
||||
|
||||
super().__init__(state_dim=state_dim,
|
||||
observation_dim=observation_dim,
|
||||
F=F,
|
||||
H=H)
|
||||
|
||||
# TODO check
|
||||
# give high uncertainty to the unobservable initial velocities
|
||||
self.kf.R[2:, 2:] *= 10 # [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 10]]
|
||||
self.kf.P[4:, 4:] *= 1000
|
||||
self.kf.P *= 10
|
||||
self.kf.Q[-1, -1] *= 0.01
|
||||
self.kf.Q[4:, 4:] *= 0.01
|
||||
|
||||
# keep all observations
|
||||
self.history_obs = []
|
||||
self.attr_saved = None
|
||||
self.observed = False
|
||||
|
||||
def initialize(self, observation):
|
||||
"""
|
||||
Args:
|
||||
observation: x-y-s-a
|
||||
"""
|
||||
self.kf.x = self.kf.x.flatten()
|
||||
self.kf.x[:4] = observation
|
||||
|
||||
|
||||
def predict(self, ):
|
||||
""" predict step
|
||||
|
||||
"""
|
||||
|
||||
# s + vs
|
||||
if (self.kf.x[6] + self.kf.x[2] <= 0):
|
||||
self.kf.x[6] *= 0.0
|
||||
|
||||
self.kf.predict()
|
||||
|
||||
def _freeze(self, ):
|
||||
""" freeze all the param of Kalman
|
||||
|
||||
"""
|
||||
self.attr_saved = deepcopy(self.kf.__dict__)
|
||||
|
||||
def _unfreeze(self, ):
|
||||
""" when observe an lost object again, use the virtual trajectory
|
||||
|
||||
"""
|
||||
if self.attr_saved is not None:
|
||||
new_history = deepcopy(self.history_obs)
|
||||
self.kf.__dict__ = self.attr_saved
|
||||
|
||||
self.history_obs = self.history_obs[:-1]
|
||||
|
||||
occur = [int(d is None) for d in new_history]
|
||||
indices = np.where(np.array(occur)==0)[0]
|
||||
index1 = indices[-2]
|
||||
index2 = indices[-1]
|
||||
box1 = new_history[index1]
|
||||
x1, y1, s1, r1 = box1
|
||||
w1 = np.sqrt(s1 * r1)
|
||||
h1 = np.sqrt(s1 / r1)
|
||||
box2 = new_history[index2]
|
||||
x2, y2, s2, r2 = box2
|
||||
w2 = np.sqrt(s2 * r2)
|
||||
h2 = np.sqrt(s2 / r2)
|
||||
time_gap = index2 - index1
|
||||
dx = (x2-x1)/time_gap
|
||||
dy = (y2-y1)/time_gap
|
||||
dw = (w2-w1)/time_gap
|
||||
dh = (h2-h1)/time_gap
|
||||
for i in range(index2 - index1):
|
||||
"""
|
||||
The default virtual trajectory generation is by linear
|
||||
motion (constant speed hypothesis), you could modify this
|
||||
part to implement your own.
|
||||
"""
|
||||
x = x1 + (i+1) * dx
|
||||
y = y1 + (i+1) * dy
|
||||
w = w1 + (i+1) * dw
|
||||
h = h1 + (i+1) * dh
|
||||
s = w * h
|
||||
r = w / float(h)
|
||||
new_box = np.array([x, y, s, r]).reshape((4, 1))
|
||||
"""
|
||||
I still use predict-update loop here to refresh the parameters,
|
||||
but this can be faster by directly modifying the internal parameters
|
||||
as suggested in the paper. I keep this naive but slow way for
|
||||
easy read and understanding
|
||||
"""
|
||||
self.kf.update(new_box)
|
||||
if not i == (index2-index1-1):
|
||||
self.kf.predict()
|
||||
|
||||
|
||||
def update(self, z):
|
||||
""" update step
|
||||
|
||||
For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman
|
||||
|
||||
Args:
|
||||
z: observation x-y-s-a format
|
||||
"""
|
||||
|
||||
self.history_obs.append(z)
|
||||
|
||||
if z is None:
|
||||
if self.observed:
|
||||
self._freeze()
|
||||
self.observed = False
|
||||
|
||||
self.kf.update(z)
|
||||
|
||||
else:
|
||||
if not self.observed: # Get observation, use online smoothing to re-update parameters
|
||||
self._unfreeze()
|
||||
|
||||
self.kf.update(z)
|
||||
|
||||
self.observed = True
|
||||
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
from numpy.core.multiarray import zeros as zeros
|
||||
from .base_kalman import BaseKalman
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
|
||||
class SORTKalman(BaseKalman):
|
||||
|
||||
def __init__(self, ):
|
||||
|
||||
state_dim = 7 # [x, y, s, a, vx, vy, vs] s: area
|
||||
observation_dim = 4
|
||||
|
||||
F = np.array([[1, 0, 0, 0, 1, 0, 0],
|
||||
[0, 1, 0, 0, 0, 1, 0],
|
||||
[0, 0, 1, 0, 0, 0, 1],
|
||||
[0, 0, 0, 1, 0, 0, 0],
|
||||
[0, 0, 0, 0, 1, 0, 0],
|
||||
[0, 0, 0, 0, 0, 1, 0],
|
||||
[0, 0, 0, 0, 0, 0, 1]])
|
||||
|
||||
H = np.eye(state_dim // 2 + 1, state_dim)
|
||||
|
||||
super().__init__(state_dim=state_dim,
|
||||
observation_dim=observation_dim,
|
||||
F=F,
|
||||
H=H)
|
||||
|
||||
# TODO check
|
||||
# give high uncertainty to the unobservable initial velocities
|
||||
self.kf.R[2:, 2:] *= 10 # [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 10]]
|
||||
self.kf.P[4:, 4:] *= 1000
|
||||
self.kf.P *= 10
|
||||
self.kf.Q[-1, -1] *= 0.01
|
||||
self.kf.Q[4:, 4:] *= 0.01
|
||||
|
||||
# keep all observations
|
||||
self.history_obs = []
|
||||
self.attr_saved = None
|
||||
self.observed = False
|
||||
|
||||
def initialize(self, observation):
|
||||
"""
|
||||
Args:
|
||||
observation: x-y-s-a
|
||||
"""
|
||||
self.kf.x = self.kf.x.flatten()
|
||||
self.kf.x[:4] = observation
|
||||
|
||||
|
||||
def predict(self, ):
|
||||
""" predict step
|
||||
|
||||
"""
|
||||
|
||||
# s + vs
|
||||
if (self.kf.x[6] + self.kf.x[2] <= 0):
|
||||
self.kf.x[6] *= 0.0
|
||||
|
||||
self.kf.predict()
|
||||
|
||||
def update(self, z):
|
||||
""" update step
|
||||
|
||||
For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman
|
||||
|
||||
Args:
|
||||
z: observation x-y-s-a format
|
||||
"""
|
||||
|
||||
self.kf.update(z)
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
from .base_kalman import BaseKalman
|
||||
import numpy as np
|
||||
|
||||
class NSAKalman(BaseKalman):
|
||||
|
||||
def __init__(self, ):
|
||||
|
||||
state_dim = 8 # [x, y, a, h, vx, vy, va, vh]
|
||||
observation_dim = 4
|
||||
|
||||
F = np.eye(state_dim, state_dim)
|
||||
'''
|
||||
[1, 0, 0, 0, 1, 0, 0]
|
||||
[0, 1, 0, 0, 0, 1, 0]
|
||||
...
|
||||
'''
|
||||
for i in range(state_dim // 2):
|
||||
F[i, i + state_dim // 2] = 1
|
||||
|
||||
H = np.eye(state_dim // 2, state_dim)
|
||||
|
||||
super().__init__(state_dim=state_dim,
|
||||
observation_dim=observation_dim,
|
||||
F=F,
|
||||
H=H)
|
||||
|
||||
self._std_weight_position = 1. / 20
|
||||
self._std_weight_velocity = 1. / 160
|
||||
|
||||
def initialize(self, observation):
|
||||
""" init x, P, Q, R
|
||||
|
||||
Args:
|
||||
observation: x-y-a-h format
|
||||
"""
|
||||
# init x, P, Q, R
|
||||
|
||||
mean_pos = observation
|
||||
mean_vel = np.zeros_like(observation)
|
||||
self.kf.x = np.r_[mean_pos, mean_vel] # x_{0, 0}
|
||||
|
||||
std = [
|
||||
2 * self._std_weight_position * observation[3], # related to h
|
||||
2 * self._std_weight_position * observation[3],
|
||||
1e-2,
|
||||
2 * self._std_weight_position * observation[3],
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
1e-5,
|
||||
10 * self._std_weight_velocity * observation[3],
|
||||
]
|
||||
|
||||
self.kf.P = np.diag(np.square(std)) # P_{0, 0}
|
||||
|
||||
def predict(self, ):
|
||||
""" predict step
|
||||
|
||||
x_{n + 1, n} = F * x_{n, n}
|
||||
P_{n + 1, n} = F * P_{n, n} * F^T + Q
|
||||
|
||||
"""
|
||||
std_pos = [
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
1e-2,
|
||||
self._std_weight_position * self.kf.x[3]]
|
||||
std_vel = [
|
||||
self._std_weight_velocity * self.kf.x[3],
|
||||
self._std_weight_velocity * self.kf.x[3],
|
||||
1e-5,
|
||||
self._std_weight_velocity * self.kf.x[3]]
|
||||
|
||||
Q = np.diag(np.square(np.r_[std_pos, std_vel]))
|
||||
|
||||
self.kf.predict(Q=Q)
|
||||
|
||||
def update(self, z, score):
|
||||
""" update step
|
||||
|
||||
Args:
|
||||
z: observation x-y-a-h format
|
||||
score: the detection score/confidence required by NSA kalman
|
||||
|
||||
K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
|
||||
x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
|
||||
P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
|
||||
|
||||
"""
|
||||
|
||||
std = [
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
self._std_weight_position * self.kf.x[3],
|
||||
1e-1,
|
||||
self._std_weight_position * self.kf.x[3]]
|
||||
|
||||
# NSA
|
||||
std = [(1. - score) * x for x in std]
|
||||
|
||||
R = np.diag(np.square(std))
|
||||
|
||||
self.kf.update(z=z, R=R)
|
||||
@@ -0,0 +1,27 @@
|
||||
from .base_kalman import BaseKalman
|
||||
import numpy as np
|
||||
|
||||
class UCMCKalman(BaseKalman):
|
||||
def __init__(self, ):
|
||||
|
||||
state_dim = 8
|
||||
observation_dim = 4
|
||||
|
||||
F = np.eye(state_dim, state_dim)
|
||||
'''
|
||||
[1, 0, 0, 0, 1, 0, 0]
|
||||
[0, 1, 0, 0, 0, 1, 0]
|
||||
...
|
||||
'''
|
||||
for i in range(state_dim // 2):
|
||||
F[i, i + state_dim // 2] = 1
|
||||
|
||||
H = np.eye(state_dim // 2, state_dim)
|
||||
|
||||
super().__init__(state_dim=state_dim,
|
||||
observation_dim=observation_dim,
|
||||
F=F,
|
||||
H=H)
|
||||
|
||||
self._std_weight_position = 1. / 20
|
||||
self._std_weight_velocity = 1. / 160
|
||||
388
yolov7-tracker-example/tracker/trackers/matching.py
Normal file
388
yolov7-tracker-example/tracker/trackers/matching.py
Normal file
@@ -0,0 +1,388 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import scipy
|
||||
import lap
|
||||
from scipy.spatial.distance import cdist
|
||||
import math
|
||||
from cython_bbox import bbox_overlaps as bbox_ious
|
||||
import time
|
||||
|
||||
chi2inv95 = {
|
||||
1: 3.8415,
|
||||
2: 5.9915,
|
||||
3: 7.8147,
|
||||
4: 9.4877,
|
||||
5: 11.070,
|
||||
6: 12.592,
|
||||
7: 14.067,
|
||||
8: 15.507,
|
||||
9: 16.919}
|
||||
|
||||
|
||||
def merge_matches(m1, m2, shape):
|
||||
O,P,Q = shape
|
||||
m1 = np.asarray(m1)
|
||||
m2 = np.asarray(m2)
|
||||
|
||||
M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
|
||||
M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
|
||||
|
||||
mask = M1*M2
|
||||
match = mask.nonzero()
|
||||
match = list(zip(match[0], match[1]))
|
||||
unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
|
||||
unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
|
||||
|
||||
return match, unmatched_O, unmatched_Q
|
||||
|
||||
|
||||
def _indices_to_matches(cost_matrix, indices, thresh):
|
||||
matched_cost = cost_matrix[tuple(zip(*indices))]
|
||||
matched_mask = (matched_cost <= thresh)
|
||||
|
||||
matches = indices[matched_mask]
|
||||
unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
|
||||
unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
|
||||
|
||||
return matches, unmatched_a, unmatched_b
|
||||
|
||||
|
||||
def linear_assignment(cost_matrix, thresh):
|
||||
if cost_matrix.size == 0:
|
||||
return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
|
||||
matches, unmatched_a, unmatched_b = [], [], []
|
||||
cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
|
||||
for ix, mx in enumerate(x):
|
||||
if mx >= 0:
|
||||
matches.append([ix, mx])
|
||||
unmatched_a = np.where(x < 0)[0]
|
||||
unmatched_b = np.where(y < 0)[0]
|
||||
matches = np.asarray(matches)
|
||||
return matches, unmatched_a, unmatched_b
|
||||
|
||||
|
||||
def ious(atlbrs, btlbrs):
|
||||
"""
|
||||
Compute cost based on IoU
|
||||
:type atlbrs: list[tlbr] | np.ndarray
|
||||
:type atlbrs: list[tlbr] | np.ndarray
|
||||
|
||||
:rtype ious np.ndarray
|
||||
"""
|
||||
ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float64)
|
||||
if ious.size == 0:
|
||||
return ious
|
||||
|
||||
ious = bbox_ious(
|
||||
np.ascontiguousarray(atlbrs, dtype=np.float64),
|
||||
np.ascontiguousarray(btlbrs, dtype=np.float64)
|
||||
)
|
||||
|
||||
return ious
|
||||
|
||||
|
||||
def iou_distance(atracks, btracks):
|
||||
"""
|
||||
Compute cost based on IoU
|
||||
:type atracks: list[STrack]
|
||||
:type btracks: list[STrack]
|
||||
|
||||
:rtype cost_matrix np.ndarray
|
||||
"""
|
||||
|
||||
if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
|
||||
atlbrs = atracks
|
||||
btlbrs = btracks
|
||||
else:
|
||||
atlbrs = [track.tlbr for track in atracks]
|
||||
btlbrs = [track.tlbr for track in btracks]
|
||||
_ious = ious(atlbrs, btlbrs)
|
||||
cost_matrix = 1 - _ious
|
||||
|
||||
return cost_matrix
|
||||
|
||||
def v_iou_distance(atracks, btracks):
|
||||
"""
|
||||
Compute cost based on IoU
|
||||
:type atracks: list[STrack]
|
||||
:type btracks: list[STrack]
|
||||
|
||||
:rtype cost_matrix np.ndarray
|
||||
"""
|
||||
|
||||
if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
|
||||
atlbrs = atracks
|
||||
btlbrs = btracks
|
||||
else:
|
||||
atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
|
||||
btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
|
||||
_ious = ious(atlbrs, btlbrs)
|
||||
cost_matrix = 1 - _ious
|
||||
|
||||
return cost_matrix
|
||||
|
||||
def embedding_distance(tracks, detections, metric='cosine'):
|
||||
"""
|
||||
:param tracks: list[STrack]
|
||||
:param detections: list[BaseTrack]
|
||||
:param metric:
|
||||
:return: cost_matrix np.ndarray
|
||||
"""
|
||||
|
||||
cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float64)
|
||||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float64)
|
||||
#for i, track in enumerate(tracks):
|
||||
#cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
|
||||
track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float64)
|
||||
cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features
|
||||
return cost_matrix
|
||||
|
||||
|
||||
def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
|
||||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
gating_dim = 2 if only_position else 4
|
||||
gating_threshold = chi2inv95[gating_dim]
|
||||
measurements = np.asarray([det.to_xyah() for det in detections])
|
||||
for row, track in enumerate(tracks):
|
||||
gating_distance = kf.gating_distance(
|
||||
track.mean, track.covariance, measurements, only_position, metric='maha')
|
||||
cost_matrix[row, gating_distance > gating_threshold] = np.inf
|
||||
cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
|
||||
return cost_matrix
|
||||
|
||||
|
||||
def fuse_iou(cost_matrix, tracks, detections):
|
||||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
reid_sim = 1 - cost_matrix
|
||||
iou_dist = iou_distance(tracks, detections)
|
||||
iou_sim = 1 - iou_dist
|
||||
fuse_sim = reid_sim * (1 + iou_sim) / 2
|
||||
det_scores = np.array([det.score for det in detections])
|
||||
det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
|
||||
#fuse_sim = fuse_sim * (1 + det_scores) / 2
|
||||
fuse_cost = 1 - fuse_sim
|
||||
return fuse_cost
|
||||
|
||||
|
||||
def fuse_score(cost_matrix, detections):
|
||||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
iou_sim = 1 - cost_matrix
|
||||
det_scores = np.array([det.score for det in detections])
|
||||
det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
|
||||
fuse_sim = iou_sim * det_scores
|
||||
fuse_cost = 1 - fuse_sim
|
||||
return fuse_cost
|
||||
|
||||
|
||||
def greedy_assignment_iou(dist, thresh):
|
||||
matched_indices = []
|
||||
if dist.shape[1] == 0:
|
||||
return np.array(matched_indices, np.int32).reshape(-1, 2)
|
||||
for i in range(dist.shape[0]):
|
||||
j = dist[i].argmin()
|
||||
if dist[i][j] < thresh:
|
||||
dist[:, j] = 1.
|
||||
matched_indices.append([j, i])
|
||||
return np.array(matched_indices, np.int32).reshape(-1, 2)
|
||||
|
||||
def greedy_assignment(dists, threshs):
|
||||
matches = greedy_assignment_iou(dists.T, threshs)
|
||||
u_det = [d for d in range(dists.shape[1]) if not (d in matches[:, 1])]
|
||||
u_track = [d for d in range(dists.shape[0]) if not (d in matches[:, 0])]
|
||||
return matches, u_track, u_det
|
||||
|
||||
def fuse_score_matrix(cost_matrix, detections, tracks):
|
||||
if cost_matrix.size == 0:
|
||||
return cost_matrix
|
||||
iou_sim = 1 - cost_matrix
|
||||
|
||||
det_scores = np.array([det.score for det in detections])
|
||||
det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
|
||||
trk_scores = np.array([trk.score for trk in tracks])
|
||||
trk_scores = np.expand_dims(trk_scores, axis=1).repeat(cost_matrix.shape[1], axis=1)
|
||||
mid_scores = (det_scores + trk_scores) / 2
|
||||
fuse_sim = iou_sim * mid_scores
|
||||
fuse_cost = 1 - fuse_sim
|
||||
|
||||
return fuse_cost
|
||||
|
||||
"""
|
||||
calculate buffered IoU, used in C_BIoU_Tracker
|
||||
"""
|
||||
def buffered_iou_distance(atracks, btracks, level=1):
|
||||
"""
|
||||
atracks: list[C_BIoUSTrack], tracks
|
||||
btracks: list[C_BIoUSTrack], detections
|
||||
level: cascade level, 1 or 2
|
||||
"""
|
||||
assert level in [1, 2], 'level must be 1 or 2'
|
||||
if level == 1: # use motion_state1(tracks) and buffer_bbox1(detections) to calculate
|
||||
atlbrs = [track.tlwh_to_tlbr(track.motion_state1) for track in atracks]
|
||||
btlbrs = [det.tlwh_to_tlbr(det.buffer_bbox1) for det in btracks]
|
||||
else:
|
||||
atlbrs = [track.tlwh_to_tlbr(track.motion_state2) for track in atracks]
|
||||
btlbrs = [det.tlwh_to_tlbr(det.buffer_bbox2) for det in btracks]
|
||||
_ious = ious(atlbrs, btlbrs)
|
||||
|
||||
cost_matrix = 1 - _ious
|
||||
return cost_matrix
|
||||
|
||||
"""
|
||||
observation centric association, with velocity, for OC Sort
|
||||
"""
|
||||
def observation_centric_association(tracklets, detections, iou_threshold, velocities, previous_obs, vdc_weight):
|
||||
|
||||
if(len(tracklets) == 0):
|
||||
return np.empty((0, 2), dtype=int), tuple(range(len(tracklets))), tuple(range(len(detections)))
|
||||
|
||||
# get numpy format bboxes
|
||||
trk_tlbrs = np.array([track.tlbr for track in tracklets])
|
||||
det_tlbrs = np.array([det.tlbr for det in detections])
|
||||
det_scores = np.array([det.score for det in detections])
|
||||
|
||||
iou_matrix = bbox_ious(trk_tlbrs, det_tlbrs)
|
||||
|
||||
Y, X = speed_direction_batch(det_tlbrs, previous_obs)
|
||||
inertia_Y, inertia_X = velocities[:,0], velocities[:,1]
|
||||
inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
|
||||
inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
|
||||
diff_angle_cos = inertia_X * X + inertia_Y * Y
|
||||
diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
|
||||
diff_angle = np.arccos(diff_angle_cos)
|
||||
diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
|
||||
|
||||
valid_mask = np.ones(previous_obs.shape[0])
|
||||
valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
|
||||
|
||||
scores = np.repeat(det_scores[:, np.newaxis], trk_tlbrs.shape[0], axis=1)
|
||||
valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
|
||||
|
||||
angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
|
||||
angle_diff_cost = angle_diff_cost * scores.T
|
||||
|
||||
matches, unmatched_a, unmatched_b = linear_assignment(- (iou_matrix + angle_diff_cost), thresh=0.9)
|
||||
|
||||
|
||||
return matches, unmatched_a, unmatched_b
|
||||
|
||||
"""
|
||||
helper func of observation_centric_association
|
||||
"""
|
||||
def speed_direction_batch(dets, tracks):
|
||||
tracks = tracks[..., np.newaxis]
|
||||
CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:,1] + dets[:,3]) / 2.0
|
||||
CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0
|
||||
dx = CX2 - CX1
|
||||
dy = CY2 - CY1
|
||||
norm = np.sqrt(dx**2 + dy**2) + 1e-6
|
||||
dx = dx / norm
|
||||
dy = dy / norm
|
||||
return dy, dx # size: num_track x num_det
|
||||
|
||||
|
||||
def matching_cascade(
|
||||
distance_metric, matching_thresh, cascade_depth, tracks, detections,
|
||||
track_indices=None, detection_indices=None):
|
||||
"""
|
||||
Run matching cascade in DeepSORT
|
||||
|
||||
distance_metirc: function that calculate the cost matrix
|
||||
matching_thresh: float, Associations with cost larger than this value are disregarded.
|
||||
cascade_path: int, equal to max_age of a tracklet
|
||||
tracks: List[STrack], current tracks
|
||||
detections: List[STrack], current detections
|
||||
track_indices: List[int], tracks that will be calculated, Default None
|
||||
detection_indices: List[int], detections that will be calculated, Default None
|
||||
|
||||
return:
|
||||
matched pair, unmatched tracks, unmatced detections: List[int], List[int], List[int]
|
||||
"""
|
||||
if track_indices is None:
|
||||
track_indices = list(range(len(tracks)))
|
||||
if detection_indices is None:
|
||||
detection_indices = list(range(len(detections)))
|
||||
|
||||
detections_to_match = detection_indices
|
||||
matches = []
|
||||
|
||||
for level in range(cascade_depth):
|
||||
"""
|
||||
match new track with detection firstly
|
||||
"""
|
||||
if not len(detections_to_match): # No detections left
|
||||
break
|
||||
|
||||
track_indices_l = [
|
||||
k for k in track_indices
|
||||
if tracks[k].time_since_update == 1 + level
|
||||
] # filter tracks whose age is equal to level + 1 (The age of Newest track = 1)
|
||||
|
||||
if not len(track_indices_l): # Nothing to match at this level
|
||||
continue
|
||||
|
||||
# tracks and detections which will be mathcted in current level
|
||||
track_l = [tracks[idx] for idx in track_indices_l] # List[STrack]
|
||||
det_l = [detections[idx] for idx in detections_to_match] # List[STrack]
|
||||
|
||||
# calculate the cost matrix
|
||||
cost_matrix = distance_metric(track_l, det_l)
|
||||
|
||||
# solve the linear assignment problem
|
||||
matched_row_col, umatched_row, umatched_col = \
|
||||
linear_assignment(cost_matrix, matching_thresh)
|
||||
|
||||
for row, col in matched_row_col: # for those who matched
|
||||
matches.append((track_indices_l[row], detections_to_match[col]))
|
||||
|
||||
umatched_detecion_l = [] # current detections not matched
|
||||
for col in umatched_col: # for detections not matched
|
||||
umatched_detecion_l.append(detections_to_match[col])
|
||||
|
||||
detections_to_match = umatched_detecion_l # update detections to match for next level
|
||||
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
|
||||
|
||||
return matches, unmatched_tracks, detections_to_match
|
||||
|
||||
def nearest_embedding_distance(tracks, detections, metric='cosine'):
|
||||
"""
|
||||
different from embedding distance, this func calculate the
|
||||
nearest distance among all track history features and detections
|
||||
|
||||
tracks: list[STrack]
|
||||
detections: list[STrack]
|
||||
metric: str, cosine or euclidean
|
||||
TODO: support euclidean distance
|
||||
|
||||
return:
|
||||
cost_matrix, np.ndarray, shape(len(tracks), len(detections))
|
||||
"""
|
||||
cost_matrix = np.zeros((len(tracks), len(detections)))
|
||||
det_features = np.asarray([det.features[-1] for det in detections])
|
||||
|
||||
for row, track in enumerate(tracks):
|
||||
track_history_features = np.asarray(track.features)
|
||||
dist = 1. - cal_cosine_distance(track_history_features, det_features)
|
||||
dist = dist.min(axis=0)
|
||||
cost_matrix[row, :] = dist
|
||||
|
||||
return cost_matrix
|
||||
|
||||
def cal_cosine_distance(mat1, mat2):
|
||||
"""
|
||||
simple func to calculate cosine distance between 2 matrixs
|
||||
|
||||
:param mat1: np.ndarray, shape(M, dim)
|
||||
:param mat2: np.ndarray, shape(N, dim)
|
||||
:return: np.ndarray, shape(M, N)
|
||||
"""
|
||||
# result = mat1·mat2^T / |mat1|·|mat2|
|
||||
# norm mat1 and mat2
|
||||
mat1 = mat1 / np.linalg.norm(mat1, axis=1, keepdims=True)
|
||||
mat2 = mat2 / np.linalg.norm(mat2, axis=1, keepdims=True)
|
||||
|
||||
return np.dot(mat1, mat2.T)
|
||||
237
yolov7-tracker-example/tracker/trackers/ocsort_tracker.py
Normal file
237
yolov7-tracker-example/tracker/trackers/ocsort_tracker.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""
|
||||
OC Sort
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet, Tracklet_w_velocity
|
||||
from .matching import *
|
||||
|
||||
from cython_bbox import bbox_overlaps as bbox_ious
|
||||
|
||||
class OCSortTracker(object):
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
self.delta_t = 3
|
||||
|
||||
@staticmethod
|
||||
def k_previous_obs(observations, cur_age, k):
|
||||
if len(observations) == 0:
|
||||
return [-1, -1, -1, -1, -1]
|
||||
for i in range(k):
|
||||
dt = k - i
|
||||
if cur_age - dt in observations:
|
||||
return observations[cur_age-dt]
|
||||
max_age = max(observations.keys())
|
||||
return observations[max_age]
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlbr format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
inds_low = scores > 0.1
|
||||
inds_high = scores < self.args.conf_thresh
|
||||
|
||||
inds_second = np.logical_and(inds_low, inds_high)
|
||||
dets_second = bboxes[inds_second]
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
cates_second = categories[inds_second]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
scores_second = scores[inds_second]
|
||||
|
||||
if len(dets) > 0:
|
||||
'''Detections'''
|
||||
detections = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, Observation Centric Momentum'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
velocities = np.array(
|
||||
[trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in tracklet_pool])
|
||||
|
||||
# last observation, obervation-centric
|
||||
# last_boxes = np.array([trk.last_observation for trk in tracklet_pool])
|
||||
|
||||
# historical observations
|
||||
k_observations = np.array(
|
||||
[self.k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in tracklet_pool])
|
||||
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
# Observation centric cost matrix and assignment
|
||||
matches, u_track, u_detection = observation_centric_association(
|
||||
tracklets=tracklet_pool, detections=detections, iou_threshold=0.3,
|
||||
velocities=velocities, previous_obs=k_observations, vdc_weight=0.2
|
||||
)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
''' Step 3: Second association, with low score detection boxes'''
|
||||
# association the untrack to the low score detections
|
||||
if len(dets_second) > 0:
|
||||
'''Detections'''
|
||||
detections_second = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
|
||||
else:
|
||||
detections_second = []
|
||||
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
|
||||
# for unmatched tracks in the first round, use last obervation
|
||||
r_tracked_tracklets_last_observ = [tracklet_pool[i].last_observation[:4] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
detections_second_bbox = [det.tlbr for det in detections_second]
|
||||
|
||||
dists = 1. - ious(r_tracked_tracklets_last_observ, detections_second_bbox)
|
||||
|
||||
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
|
||||
for itracked, idet in matches:
|
||||
track = r_tracked_tracklets[itracked]
|
||||
det = detections_second[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = r_tracked_tracklets[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = iou_distance(unconfirmed, detections)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
AFLink code in StrongSORT(StrongSORT: Make DeepSORT Great Again(arxiv))
|
||||
|
||||
copied from origin repo
|
||||
"""
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
import cv2
|
||||
import logging
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
|
||||
class TemporalBlock(nn.Module):
|
||||
def __init__(self, cin, cout):
|
||||
super(TemporalBlock, self).__init__()
|
||||
self.conv = nn.Conv2d(cin, cout, (7, 1), bias=False)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.bnf = nn.BatchNorm1d(cout)
|
||||
self.bnx = nn.BatchNorm1d(cout)
|
||||
self.bny = nn.BatchNorm1d(cout)
|
||||
|
||||
def bn(self, x):
|
||||
x[:, :, :, 0] = self.bnf(x[:, :, :, 0])
|
||||
x[:, :, :, 1] = self.bnx(x[:, :, :, 1])
|
||||
x[:, :, :, 2] = self.bny(x[:, :, :, 2])
|
||||
return x
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class FusionBlock(nn.Module):
|
||||
def __init__(self, cin, cout):
|
||||
super(FusionBlock, self).__init__()
|
||||
self.conv = nn.Conv2d(cin, cout, (1, 3), bias=False)
|
||||
self.bn = nn.BatchNorm2d(cout)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class Classifier(nn.Module):
|
||||
def __init__(self, cin):
|
||||
super(Classifier, self).__init__()
|
||||
self.fc1 = nn.Linear(cin*2, cin//2)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.fc2 = nn.Linear(cin//2, 2)
|
||||
|
||||
def forward(self, x1, x2):
|
||||
x = torch.cat((x1, x2), dim=1)
|
||||
x = self.fc1(x)
|
||||
x = self.relu(x)
|
||||
x = self.fc2(x)
|
||||
return x
|
||||
|
||||
|
||||
class PostLinker(nn.Module):
|
||||
def __init__(self):
|
||||
super(PostLinker, self).__init__()
|
||||
self.TemporalModule_1 = nn.Sequential(
|
||||
TemporalBlock(1, 32),
|
||||
TemporalBlock(32, 64),
|
||||
TemporalBlock(64, 128),
|
||||
TemporalBlock(128, 256)
|
||||
)
|
||||
self.TemporalModule_2 = nn.Sequential(
|
||||
TemporalBlock(1, 32),
|
||||
TemporalBlock(32, 64),
|
||||
TemporalBlock(64, 128),
|
||||
TemporalBlock(128, 256)
|
||||
)
|
||||
self.FusionBlock_1 = FusionBlock(256, 256)
|
||||
self.FusionBlock_2 = FusionBlock(256, 256)
|
||||
self.pooling = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.classifier = Classifier(256)
|
||||
|
||||
def forward(self, x1, x2):
|
||||
x1 = x1[:, :, :, :3]
|
||||
x2 = x2[:, :, :, :3]
|
||||
x1 = self.TemporalModule_1(x1) # [B,1,30,3] -> [B,256,6,3]
|
||||
x2 = self.TemporalModule_2(x2)
|
||||
x1 = self.FusionBlock_1(x1)
|
||||
x2 = self.FusionBlock_2(x2)
|
||||
x1 = self.pooling(x1).squeeze(-1).squeeze(-1)
|
||||
x2 = self.pooling(x2).squeeze(-1).squeeze(-1)
|
||||
y = self.classifier(x1, x2)
|
||||
if not self.training:
|
||||
y = torch.softmax(y, dim=1)
|
||||
return y
|
||||
598
yolov7-tracker-example/tracker/trackers/reid_models/OSNet.py
Normal file
598
yolov7-tracker-example/tracker/trackers/reid_models/OSNet.py
Normal file
@@ -0,0 +1,598 @@
|
||||
from __future__ import division, absolute_import
|
||||
import warnings
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
__all__ = [
|
||||
'osnet_x1_0', 'osnet_x0_75', 'osnet_x0_5', 'osnet_x0_25', 'osnet_ibn_x1_0'
|
||||
]
|
||||
|
||||
pretrained_urls = {
|
||||
'osnet_x1_0':
|
||||
'https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY',
|
||||
'osnet_x0_75':
|
||||
'https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq',
|
||||
'osnet_x0_5':
|
||||
'https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i',
|
||||
'osnet_x0_25':
|
||||
'https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs',
|
||||
'osnet_ibn_x1_0':
|
||||
'https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l'
|
||||
}
|
||||
|
||||
|
||||
##########
|
||||
# Basic layers
|
||||
##########
|
||||
class ConvLayer(nn.Module):
|
||||
"""Convolution layer (conv + bn + relu)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding=0,
|
||||
groups=1,
|
||||
IN=False
|
||||
):
|
||||
super(ConvLayer, self).__init__()
|
||||
self.conv = nn.Conv2d(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
bias=False,
|
||||
groups=groups
|
||||
)
|
||||
if IN:
|
||||
self.bn = nn.InstanceNorm2d(out_channels, affine=True)
|
||||
else:
|
||||
self.bn = nn.BatchNorm2d(out_channels)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class Conv1x1(nn.Module):
|
||||
"""1x1 convolution + bn + relu."""
|
||||
|
||||
def __init__(self, in_channels, out_channels, stride=1, groups=1):
|
||||
super(Conv1x1, self).__init__()
|
||||
self.conv = nn.Conv2d(
|
||||
in_channels,
|
||||
out_channels,
|
||||
1,
|
||||
stride=stride,
|
||||
padding=0,
|
||||
bias=False,
|
||||
groups=groups
|
||||
)
|
||||
self.bn = nn.BatchNorm2d(out_channels)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class Conv1x1Linear(nn.Module):
|
||||
"""1x1 convolution + bn (w/o non-linearity)."""
|
||||
|
||||
def __init__(self, in_channels, out_channels, stride=1):
|
||||
super(Conv1x1Linear, self).__init__()
|
||||
self.conv = nn.Conv2d(
|
||||
in_channels, out_channels, 1, stride=stride, padding=0, bias=False
|
||||
)
|
||||
self.bn = nn.BatchNorm2d(out_channels)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class Conv3x3(nn.Module):
|
||||
"""3x3 convolution + bn + relu."""
|
||||
|
||||
def __init__(self, in_channels, out_channels, stride=1, groups=1):
|
||||
super(Conv3x3, self).__init__()
|
||||
self.conv = nn.Conv2d(
|
||||
in_channels,
|
||||
out_channels,
|
||||
3,
|
||||
stride=stride,
|
||||
padding=1,
|
||||
bias=False,
|
||||
groups=groups
|
||||
)
|
||||
self.bn = nn.BatchNorm2d(out_channels)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class LightConv3x3(nn.Module):
|
||||
"""Lightweight 3x3 convolution.
|
||||
|
||||
1x1 (linear) + dw 3x3 (nonlinear).
|
||||
"""
|
||||
|
||||
def __init__(self, in_channels, out_channels):
|
||||
super(LightConv3x3, self).__init__()
|
||||
self.conv1 = nn.Conv2d(
|
||||
in_channels, out_channels, 1, stride=1, padding=0, bias=False
|
||||
)
|
||||
self.conv2 = nn.Conv2d(
|
||||
out_channels,
|
||||
out_channels,
|
||||
3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
bias=False,
|
||||
groups=out_channels
|
||||
)
|
||||
self.bn = nn.BatchNorm2d(out_channels)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
##########
|
||||
# Building blocks for omni-scale feature learning
|
||||
##########
|
||||
class ChannelGate(nn.Module):
|
||||
"""A mini-network that generates channel-wise gates conditioned on input tensor."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels,
|
||||
num_gates=None,
|
||||
return_gates=False,
|
||||
gate_activation='sigmoid',
|
||||
reduction=16,
|
||||
layer_norm=False
|
||||
):
|
||||
super(ChannelGate, self).__init__()
|
||||
if num_gates is None:
|
||||
num_gates = in_channels
|
||||
self.return_gates = return_gates
|
||||
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
|
||||
self.fc1 = nn.Conv2d(
|
||||
in_channels,
|
||||
in_channels // reduction,
|
||||
kernel_size=1,
|
||||
bias=True,
|
||||
padding=0
|
||||
)
|
||||
self.norm1 = None
|
||||
if layer_norm:
|
||||
self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.fc2 = nn.Conv2d(
|
||||
in_channels // reduction,
|
||||
num_gates,
|
||||
kernel_size=1,
|
||||
bias=True,
|
||||
padding=0
|
||||
)
|
||||
if gate_activation == 'sigmoid':
|
||||
self.gate_activation = nn.Sigmoid()
|
||||
elif gate_activation == 'relu':
|
||||
self.gate_activation = nn.ReLU(inplace=True)
|
||||
elif gate_activation == 'linear':
|
||||
self.gate_activation = None
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Unknown gate activation: {}".format(gate_activation)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
input = x
|
||||
x = self.global_avgpool(x)
|
||||
x = self.fc1(x)
|
||||
if self.norm1 is not None:
|
||||
x = self.norm1(x)
|
||||
x = self.relu(x)
|
||||
x = self.fc2(x)
|
||||
if self.gate_activation is not None:
|
||||
x = self.gate_activation(x)
|
||||
if self.return_gates:
|
||||
return x
|
||||
return input * x
|
||||
|
||||
|
||||
class OSBlock(nn.Module):
|
||||
"""Omni-scale feature learning block."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
IN=False,
|
||||
bottleneck_reduction=4,
|
||||
**kwargs
|
||||
):
|
||||
super(OSBlock, self).__init__()
|
||||
mid_channels = out_channels // bottleneck_reduction
|
||||
self.conv1 = Conv1x1(in_channels, mid_channels)
|
||||
self.conv2a = LightConv3x3(mid_channels, mid_channels)
|
||||
self.conv2b = nn.Sequential(
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
)
|
||||
self.conv2c = nn.Sequential(
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
)
|
||||
self.conv2d = nn.Sequential(
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
LightConv3x3(mid_channels, mid_channels),
|
||||
)
|
||||
self.gate = ChannelGate(mid_channels)
|
||||
self.conv3 = Conv1x1Linear(mid_channels, out_channels)
|
||||
self.downsample = None
|
||||
if in_channels != out_channels:
|
||||
self.downsample = Conv1x1Linear(in_channels, out_channels)
|
||||
self.IN = None
|
||||
if IN:
|
||||
self.IN = nn.InstanceNorm2d(out_channels, affine=True)
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
x1 = self.conv1(x)
|
||||
x2a = self.conv2a(x1)
|
||||
x2b = self.conv2b(x1)
|
||||
x2c = self.conv2c(x1)
|
||||
x2d = self.conv2d(x1)
|
||||
x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
|
||||
x3 = self.conv3(x2)
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(identity)
|
||||
out = x3 + identity
|
||||
if self.IN is not None:
|
||||
out = self.IN(out)
|
||||
return F.relu(out)
|
||||
|
||||
|
||||
##########
|
||||
# Network architecture
|
||||
##########
|
||||
class OSNet(nn.Module):
|
||||
"""Omni-Scale Network.
|
||||
|
||||
Reference:
|
||||
- Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.
|
||||
- Zhou et al. Learning Generalisable Omni-Scale Representations
|
||||
for Person Re-Identification. TPAMI, 2021.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
num_classes,
|
||||
blocks,
|
||||
layers,
|
||||
channels,
|
||||
feature_dim=512,
|
||||
loss='softmax',
|
||||
IN=False,
|
||||
**kwargs
|
||||
):
|
||||
super(OSNet, self).__init__()
|
||||
num_blocks = len(blocks)
|
||||
assert num_blocks == len(layers)
|
||||
assert num_blocks == len(channels) - 1
|
||||
self.loss = loss
|
||||
self.feature_dim = feature_dim
|
||||
|
||||
# convolutional backbone
|
||||
self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
|
||||
self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
|
||||
self.conv2 = self._make_layer(
|
||||
blocks[0],
|
||||
layers[0],
|
||||
channels[0],
|
||||
channels[1],
|
||||
reduce_spatial_size=True,
|
||||
IN=IN
|
||||
)
|
||||
self.conv3 = self._make_layer(
|
||||
blocks[1],
|
||||
layers[1],
|
||||
channels[1],
|
||||
channels[2],
|
||||
reduce_spatial_size=True
|
||||
)
|
||||
self.conv4 = self._make_layer(
|
||||
blocks[2],
|
||||
layers[2],
|
||||
channels[2],
|
||||
channels[3],
|
||||
reduce_spatial_size=False
|
||||
)
|
||||
self.conv5 = Conv1x1(channels[3], channels[3])
|
||||
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
|
||||
# fully connected layer
|
||||
self.fc = self._construct_fc_layer(
|
||||
self.feature_dim, channels[3], dropout_p=None
|
||||
)
|
||||
# identity classification layer
|
||||
self.classifier = nn.Linear(self.feature_dim, num_classes)
|
||||
|
||||
self._init_params()
|
||||
|
||||
def _make_layer(
|
||||
self,
|
||||
block,
|
||||
layer,
|
||||
in_channels,
|
||||
out_channels,
|
||||
reduce_spatial_size,
|
||||
IN=False
|
||||
):
|
||||
layers = []
|
||||
|
||||
layers.append(block(in_channels, out_channels, IN=IN))
|
||||
for i in range(1, layer):
|
||||
layers.append(block(out_channels, out_channels, IN=IN))
|
||||
|
||||
if reduce_spatial_size:
|
||||
layers.append(
|
||||
nn.Sequential(
|
||||
Conv1x1(out_channels, out_channels),
|
||||
nn.AvgPool2d(2, stride=2)
|
||||
)
|
||||
)
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
|
||||
if fc_dims is None or fc_dims < 0:
|
||||
self.feature_dim = input_dim
|
||||
return None
|
||||
|
||||
if isinstance(fc_dims, int):
|
||||
fc_dims = [fc_dims]
|
||||
|
||||
layers = []
|
||||
for dim in fc_dims:
|
||||
layers.append(nn.Linear(input_dim, dim))
|
||||
layers.append(nn.BatchNorm1d(dim))
|
||||
layers.append(nn.ReLU(inplace=True))
|
||||
if dropout_p is not None:
|
||||
layers.append(nn.Dropout(p=dropout_p))
|
||||
input_dim = dim
|
||||
|
||||
self.feature_dim = fc_dims[-1]
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def _init_params(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(
|
||||
m.weight, mode='fan_out', nonlinearity='relu'
|
||||
)
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
elif isinstance(m, nn.BatchNorm1d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.normal_(m.weight, 0, 0.01)
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def featuremaps(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.maxpool(x)
|
||||
x = self.conv2(x)
|
||||
x = self.conv3(x)
|
||||
x = self.conv4(x)
|
||||
x = self.conv5(x)
|
||||
return x
|
||||
|
||||
def forward(self, x, return_featuremaps=False):
|
||||
x = self.featuremaps(x)
|
||||
if return_featuremaps:
|
||||
return x
|
||||
v = self.global_avgpool(x)
|
||||
v = v.view(v.size(0), -1)
|
||||
if self.fc is not None:
|
||||
v = self.fc(v)
|
||||
if not self.training:
|
||||
return v
|
||||
y = self.classifier(v)
|
||||
if self.loss == 'softmax':
|
||||
return y
|
||||
elif self.loss == 'triplet':
|
||||
return y, v
|
||||
else:
|
||||
raise KeyError("Unsupported loss: {}".format(self.loss))
|
||||
|
||||
|
||||
def init_pretrained_weights(model, key=''):
|
||||
"""Initializes model with pretrained weights.
|
||||
|
||||
Layers that don't match with pretrained layers in name or size are kept unchanged.
|
||||
"""
|
||||
import os
|
||||
import errno
|
||||
import gdown
|
||||
from collections import OrderedDict
|
||||
|
||||
def _get_torch_home():
|
||||
ENV_TORCH_HOME = 'TORCH_HOME'
|
||||
ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
|
||||
DEFAULT_CACHE_DIR = '~/.cache'
|
||||
torch_home = os.path.expanduser(
|
||||
os.getenv(
|
||||
ENV_TORCH_HOME,
|
||||
os.path.join(
|
||||
os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
|
||||
)
|
||||
)
|
||||
)
|
||||
return torch_home
|
||||
|
||||
torch_home = _get_torch_home()
|
||||
model_dir = os.path.join(torch_home, 'checkpoints')
|
||||
try:
|
||||
os.makedirs(model_dir)
|
||||
except OSError as e:
|
||||
if e.errno == errno.EEXIST:
|
||||
# Directory already exists, ignore.
|
||||
pass
|
||||
else:
|
||||
# Unexpected OSError, re-raise.
|
||||
raise
|
||||
filename = key + '_imagenet.pth'
|
||||
cached_file = os.path.join(model_dir, filename)
|
||||
|
||||
if not os.path.exists(cached_file):
|
||||
gdown.download(pretrained_urls[key], cached_file, quiet=False)
|
||||
|
||||
state_dict = torch.load(cached_file)
|
||||
model_dict = model.state_dict()
|
||||
new_state_dict = OrderedDict()
|
||||
matched_layers, discarded_layers = [], []
|
||||
|
||||
for k, v in state_dict.items():
|
||||
if k.startswith('module.'):
|
||||
k = k[7:] # discard module.
|
||||
|
||||
if k in model_dict and model_dict[k].size() == v.size():
|
||||
new_state_dict[k] = v
|
||||
matched_layers.append(k)
|
||||
else:
|
||||
discarded_layers.append(k)
|
||||
|
||||
model_dict.update(new_state_dict)
|
||||
model.load_state_dict(model_dict)
|
||||
|
||||
if len(matched_layers) == 0:
|
||||
warnings.warn(
|
||||
'The pretrained weights from "{}" cannot be loaded, '
|
||||
'please check the key names manually '
|
||||
'(** ignored and continue **)'.format(cached_file)
|
||||
)
|
||||
else:
|
||||
print(
|
||||
'Successfully loaded imagenet pretrained weights from "{}"'.
|
||||
format(cached_file)
|
||||
)
|
||||
if len(discarded_layers) > 0:
|
||||
print(
|
||||
'** The following layers are discarded '
|
||||
'due to unmatched keys or layer size: {}'.
|
||||
format(discarded_layers)
|
||||
)
|
||||
|
||||
|
||||
##########
|
||||
# Instantiation
|
||||
##########
|
||||
def osnet_x1_0(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
|
||||
# standard size (width x1.0)
|
||||
model = OSNet(
|
||||
num_classes,
|
||||
blocks=[OSBlock, OSBlock, OSBlock],
|
||||
layers=[2, 2, 2],
|
||||
channels=[64, 256, 384, 512],
|
||||
loss=loss,
|
||||
**kwargs
|
||||
)
|
||||
if pretrained:
|
||||
init_pretrained_weights(model, key='osnet_x1_0')
|
||||
return model
|
||||
|
||||
|
||||
def osnet_x0_75(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
|
||||
# medium size (width x0.75)
|
||||
model = OSNet(
|
||||
num_classes,
|
||||
blocks=[OSBlock, OSBlock, OSBlock],
|
||||
layers=[2, 2, 2],
|
||||
channels=[48, 192, 288, 384],
|
||||
loss=loss,
|
||||
**kwargs
|
||||
)
|
||||
if pretrained:
|
||||
init_pretrained_weights(model, key='osnet_x0_75')
|
||||
return model
|
||||
|
||||
|
||||
def osnet_x0_5(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
|
||||
# tiny size (width x0.5)
|
||||
model = OSNet(
|
||||
num_classes,
|
||||
blocks=[OSBlock, OSBlock, OSBlock],
|
||||
layers=[2, 2, 2],
|
||||
channels=[32, 128, 192, 256],
|
||||
loss=loss,
|
||||
**kwargs
|
||||
)
|
||||
if pretrained:
|
||||
init_pretrained_weights(model, key='osnet_x0_5')
|
||||
return model
|
||||
|
||||
|
||||
def osnet_x0_25(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
|
||||
# very tiny size (width x0.25)
|
||||
model = OSNet(
|
||||
num_classes,
|
||||
blocks=[OSBlock, OSBlock, OSBlock],
|
||||
layers=[2, 2, 2],
|
||||
channels=[16, 64, 96, 128],
|
||||
loss=loss,
|
||||
**kwargs
|
||||
)
|
||||
if pretrained:
|
||||
init_pretrained_weights(model, key='osnet_x0_25')
|
||||
return model
|
||||
|
||||
|
||||
def osnet_ibn_x1_0(
|
||||
num_classes=1000, pretrained=True, loss='softmax', **kwargs
|
||||
):
|
||||
# standard size (width x1.0) + IBN layer
|
||||
# Ref: Pan et al. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. ECCV, 2018.
|
||||
model = OSNet(
|
||||
num_classes,
|
||||
blocks=[OSBlock, OSBlock, OSBlock],
|
||||
layers=[2, 2, 2],
|
||||
channels=[64, 256, 384, 512],
|
||||
loss=loss,
|
||||
IN=True,
|
||||
**kwargs
|
||||
)
|
||||
if pretrained:
|
||||
init_pretrained_weights(model, key='osnet_ibn_x1_0')
|
||||
return model
|
||||
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
file for reid_models folder
|
||||
"""
|
||||
@@ -0,0 +1,157 @@
|
||||
"""
|
||||
file for DeepSORT Re-ID model
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
import cv2
|
||||
import logging
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
def __init__(self, c_in, c_out, is_downsample=False):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.is_downsample = is_downsample
|
||||
if is_downsample:
|
||||
self.conv1 = nn.Conv2d(
|
||||
c_in, c_out, 3, stride=2, padding=1, bias=False)
|
||||
else:
|
||||
self.conv1 = nn.Conv2d(
|
||||
c_in, c_out, 3, stride=1, padding=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(c_out)
|
||||
self.relu = nn.ReLU(True)
|
||||
self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1,
|
||||
padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(c_out)
|
||||
if is_downsample:
|
||||
self.downsample = nn.Sequential(
|
||||
nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
|
||||
nn.BatchNorm2d(c_out)
|
||||
)
|
||||
elif c_in != c_out:
|
||||
self.downsample = nn.Sequential(
|
||||
nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
|
||||
nn.BatchNorm2d(c_out)
|
||||
)
|
||||
self.is_downsample = True
|
||||
|
||||
def forward(self, x):
|
||||
y = self.conv1(x)
|
||||
y = self.bn1(y)
|
||||
y = self.relu(y)
|
||||
y = self.conv2(y)
|
||||
y = self.bn2(y)
|
||||
if self.is_downsample:
|
||||
x = self.downsample(x)
|
||||
return F.relu(x.add(y), True)
|
||||
|
||||
|
||||
def make_layers(c_in, c_out, repeat_times, is_downsample=False):
|
||||
blocks = []
|
||||
for i in range(repeat_times):
|
||||
if i == 0:
|
||||
blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ]
|
||||
else:
|
||||
blocks += [BasicBlock(c_out, c_out), ]
|
||||
return nn.Sequential(*blocks)
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self, num_classes=751, reid=False):
|
||||
super(Net, self).__init__()
|
||||
# 3 128 64
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(3, 64, 3, stride=1, padding=1),
|
||||
nn.BatchNorm2d(64),
|
||||
nn.ReLU(inplace=True),
|
||||
# nn.Conv2d(32,32,3,stride=1,padding=1),
|
||||
# nn.BatchNorm2d(32),
|
||||
# nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(3, 2, padding=1),
|
||||
)
|
||||
# 32 64 32
|
||||
self.layer1 = make_layers(64, 64, 2, False)
|
||||
# 32 64 32
|
||||
self.layer2 = make_layers(64, 128, 2, True)
|
||||
# 64 32 16
|
||||
self.layer3 = make_layers(128, 256, 2, True)
|
||||
# 128 16 8
|
||||
self.layer4 = make_layers(256, 512, 2, True)
|
||||
# 256 8 4
|
||||
self.avgpool = nn.AvgPool2d((8, 4), 1)
|
||||
# 256 1 1
|
||||
self.reid = reid
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Linear(512, 256),
|
||||
nn.BatchNorm1d(256),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Dropout(),
|
||||
nn.Linear(256, num_classes),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
x = self.avgpool(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
# B x 128
|
||||
if self.reid:
|
||||
x = x.div(x.norm(p=2, dim=1, keepdim=True))
|
||||
return x
|
||||
# classifier
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
|
||||
class Extractor(object):
|
||||
def __init__(self, model_path, use_cuda=True):
|
||||
self.net = Net(reid=True)
|
||||
self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
|
||||
state_dict = torch.load(model_path, map_location=torch.device(self.device))[
|
||||
'net_dict']
|
||||
self.net.load_state_dict(state_dict)
|
||||
logger = logging.getLogger("root.tracker")
|
||||
logger.info("Loading weights from {}... Done!".format(model_path))
|
||||
self.net.to(self.device)
|
||||
self.size = (64, 128)
|
||||
self.norm = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
|
||||
])
|
||||
|
||||
def _preprocess(self, im_crops):
|
||||
"""
|
||||
TODO:
|
||||
1. to float with scale from 0 to 1
|
||||
2. resize to (64, 128) as Market1501 dataset did
|
||||
3. concatenate to a numpy array
|
||||
3. to torch Tensor
|
||||
4. normalize
|
||||
"""
|
||||
def _resize(im, size):
|
||||
try:
|
||||
return cv2.resize(im.astype(np.float32)/255., size)
|
||||
except:
|
||||
print('Error: size in bbox exists zero, ', im.shape)
|
||||
exit(0)
|
||||
|
||||
im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(
|
||||
0) for im in im_crops], dim=0).float()
|
||||
return im_batch
|
||||
|
||||
def __call__(self, im_crops):
|
||||
if isinstance(im_crops, list):
|
||||
im_batch = self._preprocess(im_crops)
|
||||
else:
|
||||
im_batch = im_crops
|
||||
|
||||
with torch.no_grad():
|
||||
im_batch = im_batch.to(self.device)
|
||||
features = self.net(im_batch)
|
||||
return features
|
||||
@@ -0,0 +1,273 @@
|
||||
"""
|
||||
load checkpoint file
|
||||
copied from https://github.com/mikel-brostrom/Yolov5_StrongSORT_OSNet
|
||||
"""
|
||||
from __future__ import division, print_function, absolute_import
|
||||
import pickle
|
||||
import shutil
|
||||
import os.path as osp
|
||||
import warnings
|
||||
from functools import partial
|
||||
from collections import OrderedDict
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
__all__ = [
|
||||
'save_checkpoint', 'load_checkpoint', 'resume_from_checkpoint',
|
||||
'open_all_layers', 'open_specified_layers', 'count_num_param',
|
||||
'load_pretrained_weights'
|
||||
]
|
||||
|
||||
def load_checkpoint(fpath):
|
||||
r"""Loads checkpoint.
|
||||
|
||||
``UnicodeDecodeError`` can be well handled, which means
|
||||
python2-saved files can be read from python3.
|
||||
|
||||
Args:
|
||||
fpath (str): path to checkpoint.
|
||||
|
||||
Returns:
|
||||
dict
|
||||
|
||||
Examples::
|
||||
>>> from torchreid.utils import load_checkpoint
|
||||
>>> fpath = 'log/my_model/model.pth.tar-10'
|
||||
>>> checkpoint = load_checkpoint(fpath)
|
||||
"""
|
||||
if fpath is None:
|
||||
raise ValueError('File path is None')
|
||||
fpath = osp.abspath(osp.expanduser(fpath))
|
||||
if not osp.exists(fpath):
|
||||
raise FileNotFoundError('File is not found at "{}"'.format(fpath))
|
||||
map_location = None if torch.cuda.is_available() else 'cpu'
|
||||
try:
|
||||
checkpoint = torch.load(fpath, map_location=map_location)
|
||||
except UnicodeDecodeError:
|
||||
pickle.load = partial(pickle.load, encoding="latin1")
|
||||
pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1")
|
||||
checkpoint = torch.load(
|
||||
fpath, pickle_module=pickle, map_location=map_location
|
||||
)
|
||||
except Exception:
|
||||
print('Unable to load checkpoint from "{}"'.format(fpath))
|
||||
raise
|
||||
return checkpoint
|
||||
|
||||
|
||||
def resume_from_checkpoint(fpath, model, optimizer=None, scheduler=None):
|
||||
r"""Resumes training from a checkpoint.
|
||||
|
||||
This will load (1) model weights and (2) ``state_dict``
|
||||
of optimizer if ``optimizer`` is not None.
|
||||
|
||||
Args:
|
||||
fpath (str): path to checkpoint.
|
||||
model (nn.Module): model.
|
||||
optimizer (Optimizer, optional): an Optimizer.
|
||||
scheduler (LRScheduler, optional): an LRScheduler.
|
||||
|
||||
Returns:
|
||||
int: start_epoch.
|
||||
|
||||
Examples::
|
||||
>>> from torchreid.utils import resume_from_checkpoint
|
||||
>>> fpath = 'log/my_model/model.pth.tar-10'
|
||||
>>> start_epoch = resume_from_checkpoint(
|
||||
>>> fpath, model, optimizer, scheduler
|
||||
>>> )
|
||||
"""
|
||||
print('Loading checkpoint from "{}"'.format(fpath))
|
||||
checkpoint = load_checkpoint(fpath)
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
print('Loaded model weights')
|
||||
if optimizer is not None and 'optimizer' in checkpoint.keys():
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
print('Loaded optimizer')
|
||||
if scheduler is not None and 'scheduler' in checkpoint.keys():
|
||||
scheduler.load_state_dict(checkpoint['scheduler'])
|
||||
print('Loaded scheduler')
|
||||
start_epoch = checkpoint['epoch']
|
||||
print('Last epoch = {}'.format(start_epoch))
|
||||
if 'rank1' in checkpoint.keys():
|
||||
print('Last rank1 = {:.1%}'.format(checkpoint['rank1']))
|
||||
return start_epoch
|
||||
|
||||
|
||||
def adjust_learning_rate(
|
||||
optimizer,
|
||||
base_lr,
|
||||
epoch,
|
||||
stepsize=20,
|
||||
gamma=0.1,
|
||||
linear_decay=False,
|
||||
final_lr=0,
|
||||
max_epoch=100
|
||||
):
|
||||
r"""Adjusts learning rate.
|
||||
|
||||
Deprecated.
|
||||
"""
|
||||
if linear_decay:
|
||||
# linearly decay learning rate from base_lr to final_lr
|
||||
frac_done = epoch / max_epoch
|
||||
lr = frac_done*final_lr + (1.-frac_done) * base_lr
|
||||
else:
|
||||
# decay learning rate by gamma for every stepsize
|
||||
lr = base_lr * (gamma**(epoch // stepsize))
|
||||
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
|
||||
|
||||
def set_bn_to_eval(m):
|
||||
r"""Sets BatchNorm layers to eval mode."""
|
||||
# 1. no update for running mean and var
|
||||
# 2. scale and shift parameters are still trainable
|
||||
classname = m.__class__.__name__
|
||||
if classname.find('BatchNorm') != -1:
|
||||
m.eval()
|
||||
|
||||
|
||||
def open_all_layers(model):
|
||||
r"""Opens all layers in model for training.
|
||||
|
||||
Examples::
|
||||
>>> from torchreid.utils import open_all_layers
|
||||
>>> open_all_layers(model)
|
||||
"""
|
||||
model.train()
|
||||
for p in model.parameters():
|
||||
p.requires_grad = True
|
||||
|
||||
|
||||
def open_specified_layers(model, open_layers):
|
||||
r"""Opens specified layers in model for training while keeping
|
||||
other layers frozen.
|
||||
|
||||
Args:
|
||||
model (nn.Module): neural net model.
|
||||
open_layers (str or list): layers open for training.
|
||||
|
||||
Examples::
|
||||
>>> from torchreid.utils import open_specified_layers
|
||||
>>> # Only model.classifier will be updated.
|
||||
>>> open_layers = 'classifier'
|
||||
>>> open_specified_layers(model, open_layers)
|
||||
>>> # Only model.fc and model.classifier will be updated.
|
||||
>>> open_layers = ['fc', 'classifier']
|
||||
>>> open_specified_layers(model, open_layers)
|
||||
"""
|
||||
if isinstance(model, nn.DataParallel):
|
||||
model = model.module
|
||||
|
||||
if isinstance(open_layers, str):
|
||||
open_layers = [open_layers]
|
||||
|
||||
for layer in open_layers:
|
||||
assert hasattr(
|
||||
model, layer
|
||||
), '"{}" is not an attribute of the model, please provide the correct name'.format(
|
||||
layer
|
||||
)
|
||||
|
||||
for name, module in model.named_children():
|
||||
if name in open_layers:
|
||||
module.train()
|
||||
for p in module.parameters():
|
||||
p.requires_grad = True
|
||||
else:
|
||||
module.eval()
|
||||
for p in module.parameters():
|
||||
p.requires_grad = False
|
||||
|
||||
|
||||
def count_num_param(model):
|
||||
r"""Counts number of parameters in a model while ignoring ``self.classifier``.
|
||||
|
||||
Args:
|
||||
model (nn.Module): network model.
|
||||
|
||||
Examples::
|
||||
>>> from torchreid.utils import count_num_param
|
||||
>>> model_size = count_num_param(model)
|
||||
|
||||
.. warning::
|
||||
|
||||
This method is deprecated in favor of
|
||||
``torchreid.utils.compute_model_complexity``.
|
||||
"""
|
||||
warnings.warn(
|
||||
'This method is deprecated and will be removed in the future.'
|
||||
)
|
||||
|
||||
num_param = sum(p.numel() for p in model.parameters())
|
||||
|
||||
if isinstance(model, nn.DataParallel):
|
||||
model = model.module
|
||||
|
||||
if hasattr(model,
|
||||
'classifier') and isinstance(model.classifier, nn.Module):
|
||||
# we ignore the classifier because it is unused at test time
|
||||
num_param -= sum(p.numel() for p in model.classifier.parameters())
|
||||
|
||||
return num_param
|
||||
|
||||
|
||||
def load_pretrained_weights(model, weight_path):
|
||||
r"""Loads pretrianed weights to model.
|
||||
|
||||
Features::
|
||||
- Incompatible layers (unmatched in name or size) will be ignored.
|
||||
- Can automatically deal with keys containing "module.".
|
||||
|
||||
Args:
|
||||
model (nn.Module): network model.
|
||||
weight_path (str): path to pretrained weights.
|
||||
|
||||
Examples::
|
||||
>>> from torchreid.utils import load_pretrained_weights
|
||||
>>> weight_path = 'log/my_model/model-best.pth.tar'
|
||||
>>> load_pretrained_weights(model, weight_path)
|
||||
"""
|
||||
checkpoint = load_checkpoint(weight_path)
|
||||
if 'state_dict' in checkpoint:
|
||||
state_dict = checkpoint['state_dict']
|
||||
else:
|
||||
state_dict = checkpoint
|
||||
|
||||
model_dict = model.state_dict()
|
||||
new_state_dict = OrderedDict()
|
||||
matched_layers, discarded_layers = [], []
|
||||
|
||||
for k, v in state_dict.items():
|
||||
if k.startswith('module.'):
|
||||
k = k[7:] # discard module.
|
||||
|
||||
if k in model_dict and model_dict[k].size() == v.size():
|
||||
new_state_dict[k] = v
|
||||
matched_layers.append(k)
|
||||
else:
|
||||
discarded_layers.append(k)
|
||||
|
||||
model_dict.update(new_state_dict)
|
||||
model.load_state_dict(model_dict)
|
||||
|
||||
if len(matched_layers) == 0:
|
||||
warnings.warn(
|
||||
'The pretrained weights "{}" cannot be loaded, '
|
||||
'please check the key names manually '
|
||||
'(** ignored and continue **)'.format(weight_path)
|
||||
)
|
||||
else:
|
||||
print(
|
||||
'Successfully loaded pretrained weights from "{}"'.
|
||||
format(weight_path)
|
||||
)
|
||||
if len(discarded_layers) > 0:
|
||||
print(
|
||||
'** The following layers are discarded '
|
||||
'due to unmatched keys or layer size: {}'.
|
||||
format(discarded_layers)
|
||||
)
|
||||
169
yolov7-tracker-example/tracker/trackers/sort_tracker.py
Normal file
169
yolov7-tracker-example/tracker/trackers/sort_tracker.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""
|
||||
Sort
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet
|
||||
from .matching import *
|
||||
|
||||
class SortTracker(object):
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlbr format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
|
||||
if len(dets) > 0:
|
||||
'''Detections'''
|
||||
detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with high score detection boxes'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
dists = iou_distance(tracklet_pool, detections)
|
||||
|
||||
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = iou_distance(unconfirmed, detections)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 3: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 4: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
338
yolov7-tracker-example/tracker/trackers/sparse_tracker.py
Normal file
338
yolov7-tracker-example/tracker/trackers/sparse_tracker.py
Normal file
@@ -0,0 +1,338 @@
|
||||
"""
|
||||
Bot sort
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchvision.ops import nms
|
||||
|
||||
import cv2
|
||||
import torchvision.transforms as T
|
||||
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet, Tracklet_w_depth
|
||||
from .matching import *
|
||||
|
||||
from .reid_models.OSNet import *
|
||||
from .reid_models.load_model_tools import load_pretrained_weights
|
||||
from .reid_models.deepsort_reid import Extractor
|
||||
|
||||
from .camera_motion_compensation import GMC
|
||||
|
||||
REID_MODEL_DICT = {
|
||||
'osnet_x1_0': osnet_x1_0,
|
||||
'osnet_x0_75': osnet_x0_75,
|
||||
'osnet_x0_5': osnet_x0_5,
|
||||
'osnet_x0_25': osnet_x0_25,
|
||||
'deepsort': Extractor
|
||||
}
|
||||
|
||||
|
||||
def load_reid_model(reid_model, reid_model_path):
|
||||
|
||||
if 'osnet' in reid_model:
|
||||
func = REID_MODEL_DICT[reid_model]
|
||||
model = func(num_classes=1, pretrained=False, )
|
||||
load_pretrained_weights(model, reid_model_path)
|
||||
model.cuda().eval()
|
||||
|
||||
elif 'deepsort' in reid_model:
|
||||
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return model
|
||||
|
||||
class SparseTracker(object):
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
# camera motion compensation module
|
||||
self.gmc = GMC(method='orb', downscale=2, verbose=None)
|
||||
|
||||
def get_deep_range(self, obj, step):
|
||||
col = []
|
||||
for t in obj:
|
||||
lend = (t.deep_vec)[2]
|
||||
col.append(lend)
|
||||
max_len, mix_len = max(col), min(col)
|
||||
if max_len != mix_len:
|
||||
deep_range =np.arange(mix_len, max_len, (max_len - mix_len + 1) / step)
|
||||
if deep_range[-1] < max_len:
|
||||
deep_range = np.concatenate([deep_range, np.array([max_len],)])
|
||||
deep_range[0] = np.floor(deep_range[0])
|
||||
deep_range[-1] = np.ceil(deep_range[-1])
|
||||
else:
|
||||
deep_range = [mix_len,]
|
||||
mask = self.get_sub_mask(deep_range, col)
|
||||
return mask
|
||||
|
||||
def get_sub_mask(self, deep_range, col):
|
||||
mix_len=deep_range[0]
|
||||
max_len=deep_range[-1]
|
||||
if max_len == mix_len:
|
||||
lc = mix_len
|
||||
mask = []
|
||||
for d in deep_range:
|
||||
if d > deep_range[0] and d < deep_range[-1]:
|
||||
mask.append((col >= lc) & (col < d))
|
||||
lc = d
|
||||
elif d == deep_range[-1]:
|
||||
mask.append((col >= lc) & (col <= d))
|
||||
lc = d
|
||||
else:
|
||||
lc = d
|
||||
continue
|
||||
return mask
|
||||
|
||||
# core function
|
||||
def DCM(self, detections, tracks, activated_tracklets, refind_tracklets, levels, thresh, is_fuse):
|
||||
if len(detections) > 0:
|
||||
det_mask = self.get_deep_range(detections, levels)
|
||||
else:
|
||||
det_mask = []
|
||||
|
||||
if len(tracks)!=0:
|
||||
track_mask = self.get_deep_range(tracks, levels)
|
||||
else:
|
||||
track_mask = []
|
||||
|
||||
u_detection, u_tracks, res_det, res_track = [], [], [], []
|
||||
if len(track_mask) != 0:
|
||||
if len(track_mask) < len(det_mask):
|
||||
for i in range(len(det_mask) - len(track_mask)):
|
||||
idx = np.argwhere(det_mask[len(track_mask) + i] == True)
|
||||
for idd in idx:
|
||||
res_det.append(detections[idd[0]])
|
||||
elif len(track_mask) > len(det_mask):
|
||||
for i in range(len(track_mask) - len(det_mask)):
|
||||
idx = np.argwhere(track_mask[len(det_mask) + i] == True)
|
||||
for idd in idx:
|
||||
res_track.append(tracks[idd[0]])
|
||||
|
||||
for dm, tm in zip(det_mask, track_mask):
|
||||
det_idx = np.argwhere(dm == True)
|
||||
trk_idx = np.argwhere(tm == True)
|
||||
|
||||
# search det
|
||||
det_ = []
|
||||
for idd in det_idx:
|
||||
det_.append(detections[idd[0]])
|
||||
det_ = det_ + u_detection
|
||||
# search trk
|
||||
track_ = []
|
||||
for idt in trk_idx:
|
||||
track_.append(tracks[idt[0]])
|
||||
# update trk
|
||||
track_ = track_ + u_tracks
|
||||
|
||||
dists = iou_distance(track_, det_)
|
||||
|
||||
matches, u_track_, u_det_ = linear_assignment(dists, thresh)
|
||||
for itracked, idet in matches:
|
||||
track = track_[itracked]
|
||||
det = det_[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det_[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
u_tracks = [track_[t] for t in u_track_]
|
||||
u_detection = [det_[t] for t in u_det_]
|
||||
|
||||
u_tracks = u_tracks + res_track
|
||||
u_detection = u_detection + res_det
|
||||
|
||||
else:
|
||||
u_detection = detections
|
||||
|
||||
return activated_tracklets, refind_tracklets, u_tracks, u_detection
|
||||
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlwh format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
inds_low = scores > 0.1
|
||||
inds_high = scores < self.args.conf_thresh
|
||||
|
||||
inds_second = np.logical_and(inds_low, inds_high)
|
||||
dets_second = bboxes[inds_second]
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
cates_second = categories[inds_second]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
scores_second = scores[inds_second]
|
||||
|
||||
if len(dets) > 0:
|
||||
detections = [Tracklet_w_depth(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Step 1: Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with high score detection boxes, depth cascade mathcing'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
# Camera motion compensation
|
||||
warp = self.gmc.apply(ori_img, dets)
|
||||
self.gmc.multi_gmc(tracklet_pool, warp)
|
||||
self.gmc.multi_gmc(unconfirmed, warp)
|
||||
|
||||
# depth cascade matching
|
||||
activated_tracklets, refind_tracklets, u_track, u_detection_high = self.DCM(
|
||||
detections,
|
||||
tracklet_pool,
|
||||
activated_tracklets,
|
||||
refind_tracklets,
|
||||
levels=3,
|
||||
thresh=0.75,
|
||||
is_fuse=True)
|
||||
|
||||
''' Step 3: Second association, with low score detection boxes, depth cascade mathcing'''
|
||||
if len(dets_second) > 0:
|
||||
'''Detections'''
|
||||
detections_second = [Tracklet_w_depth(tlwh, s, cate, motion=self.motion) for
|
||||
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
|
||||
else:
|
||||
detections_second = []
|
||||
|
||||
r_tracked_tracklets = [t for t in u_track if t.state == TrackState.Tracked]
|
||||
|
||||
activated_tracklets, refind_tracklets, u_track, u_detection_sec = self.DCM(
|
||||
detections_second,
|
||||
r_tracked_tracklets,
|
||||
activated_tracklets,
|
||||
refind_tracklets,
|
||||
levels=3,
|
||||
thresh=0.3,
|
||||
is_fuse=False)
|
||||
|
||||
for track in u_track:
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = u_detection_high
|
||||
dists = iou_distance(unconfirmed, detections)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
327
yolov7-tracker-example/tracker/trackers/strongsort_tracker.py
Normal file
327
yolov7-tracker-example/tracker/trackers/strongsort_tracker.py
Normal file
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
Deep Sort
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchvision.ops import nms
|
||||
|
||||
import cv2
|
||||
import torchvision.transforms as T
|
||||
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .tracklet import Tracklet, Tracklet_w_reid
|
||||
from .matching import *
|
||||
|
||||
from .reid_models.OSNet import *
|
||||
from .reid_models.load_model_tools import load_pretrained_weights
|
||||
from .reid_models.deepsort_reid import Extractor
|
||||
|
||||
REID_MODEL_DICT = {
|
||||
'osnet_x1_0': osnet_x1_0,
|
||||
'osnet_x0_75': osnet_x0_75,
|
||||
'osnet_x0_5': osnet_x0_5,
|
||||
'osnet_x0_25': osnet_x0_25,
|
||||
'deepsort': Extractor
|
||||
}
|
||||
|
||||
|
||||
def load_reid_model(reid_model, reid_model_path):
|
||||
|
||||
if 'osnet' in reid_model:
|
||||
func = REID_MODEL_DICT[reid_model]
|
||||
model = func(num_classes=1, pretrained=False, )
|
||||
load_pretrained_weights(model, reid_model_path)
|
||||
model.cuda().eval()
|
||||
|
||||
elif 'deepsort' in reid_model:
|
||||
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return model
|
||||
|
||||
|
||||
class StrongSortTracker(object):
|
||||
|
||||
def __init__(self, args, frame_rate=30):
|
||||
self.tracked_tracklets = [] # type: list[Tracklet]
|
||||
self.lost_tracklets = [] # type: list[Tracklet]
|
||||
self.removed_tracklets = [] # type: list[Tracklet]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
|
||||
self.det_thresh = args.conf_thresh + 0.1
|
||||
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.max_time_lost = self.buffer_size
|
||||
|
||||
self.motion = args.kalman_format
|
||||
|
||||
self.with_reid = not args.discard_reid
|
||||
|
||||
self.reid_model, self.crop_transforms = None, None
|
||||
if self.with_reid:
|
||||
self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
|
||||
self.crop_transforms = T.Compose([
|
||||
# T.ToPILImage(),
|
||||
# T.Resize(size=(256, 128)),
|
||||
T.ToTensor(), # (c, 128, 256)
|
||||
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
])
|
||||
|
||||
self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128)
|
||||
|
||||
self.lambda_ = 0.98 # the coef of cost mix in eq. 10 in paper
|
||||
|
||||
|
||||
def reid_preprocess(self, obj_bbox):
|
||||
"""
|
||||
preprocess cropped object bboxes
|
||||
|
||||
obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
|
||||
|
||||
return:
|
||||
torch.Tensor of shape (c, 128, 256)
|
||||
"""
|
||||
|
||||
obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size) # shape: (h, w, c)
|
||||
|
||||
return self.crop_transforms(obj_bbox)
|
||||
|
||||
def get_feature(self, tlwhs, ori_img):
|
||||
"""
|
||||
get apperance feature of an object
|
||||
tlwhs: shape (num_of_objects, 4)
|
||||
ori_img: original image, np.ndarray, shape(H, W, C)
|
||||
"""
|
||||
obj_bbox = []
|
||||
|
||||
for tlwh in tlwhs:
|
||||
tlwh = list(map(int, tlwh))
|
||||
|
||||
# limit to the legal range
|
||||
tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0)
|
||||
|
||||
tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
|
||||
|
||||
obj_bbox.append(tlbr_tensor)
|
||||
|
||||
if not obj_bbox:
|
||||
return np.array([])
|
||||
|
||||
obj_bbox = torch.stack(obj_bbox, dim=0)
|
||||
obj_bbox = obj_bbox.cuda()
|
||||
|
||||
features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim)
|
||||
return features.cpu().detach().numpy()
|
||||
|
||||
def update(self, output_results, img, ori_img):
|
||||
"""
|
||||
output_results: processed detections (scale to original size) tlbr format
|
||||
"""
|
||||
|
||||
self.frame_id += 1
|
||||
activated_tracklets = []
|
||||
refind_tracklets = []
|
||||
lost_tracklets = []
|
||||
removed_tracklets = []
|
||||
|
||||
scores = output_results[:, 4]
|
||||
bboxes = output_results[:, :4]
|
||||
categories = output_results[:, -1]
|
||||
|
||||
remain_inds = scores > self.args.conf_thresh
|
||||
|
||||
dets = bboxes[remain_inds]
|
||||
|
||||
cates = categories[remain_inds]
|
||||
|
||||
scores_keep = scores[remain_inds]
|
||||
|
||||
features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
|
||||
|
||||
if len(dets) > 0:
|
||||
'''Detections'''
|
||||
detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
|
||||
(tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
|
||||
else:
|
||||
detections = []
|
||||
|
||||
''' Add newly detected tracklets to tracked_tracklets'''
|
||||
unconfirmed = []
|
||||
tracked_tracklets = [] # type: list[Tracklet]
|
||||
for track in self.tracked_tracklets:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_tracklets.append(track)
|
||||
|
||||
''' Step 2: First association, with appearance'''
|
||||
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
|
||||
|
||||
# Predict the current location with Kalman
|
||||
for tracklet in tracklet_pool:
|
||||
tracklet.predict()
|
||||
|
||||
# vallina matching
|
||||
cost_matrix = self.gated_metric(tracklet_pool, detections)
|
||||
matches, u_track, u_detection = linear_assignment(cost_matrix, thresh=0.9)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
'''Step 3: Second association, with iou'''
|
||||
tracklet_for_iou = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
|
||||
detection_for_iou = [detections[i] for i in u_detection]
|
||||
|
||||
dists = iou_distance(tracklet_for_iou, detection_for_iou)
|
||||
|
||||
matches, u_track, u_detection = linear_assignment(dists, thresh=0.5)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = tracklet_for_iou[itracked]
|
||||
det = detection_for_iou[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(detection_for_iou[idet], self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_tracklets.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = tracklet_for_iou[it]
|
||||
if not track.state == TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_tracklets.append(track)
|
||||
|
||||
|
||||
|
||||
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
|
||||
detections = [detection_for_iou[i] for i in u_detection]
|
||||
dists = iou_distance(unconfirmed, detections)
|
||||
|
||||
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_tracklets.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
""" Step 4: Init new tracklets"""
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.det_thresh:
|
||||
continue
|
||||
track.activate(self.frame_id)
|
||||
activated_tracklets.append(track)
|
||||
|
||||
""" Step 5: Update state"""
|
||||
for track in self.lost_tracklets:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_tracklets.append(track)
|
||||
|
||||
# print('Ramained match {} s'.format(t4-t3))
|
||||
|
||||
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
|
||||
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
|
||||
self.lost_tracklets.extend(lost_tracklets)
|
||||
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
|
||||
self.removed_tracklets.extend(removed_tracklets)
|
||||
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
|
||||
# get scores of lost tracks
|
||||
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
|
||||
|
||||
return output_tracklets
|
||||
|
||||
def gated_metric(self, tracks, dets):
|
||||
"""
|
||||
get cost matrix, firstly calculate apperence cost, then filter by Kalman state.
|
||||
|
||||
tracks: List[STrack]
|
||||
dets: List[STrack]
|
||||
"""
|
||||
apperance_dist = embedding_distance(tracks=tracks, detections=dets, metric='cosine')
|
||||
cost_matrix = self.gate_cost_matrix(apperance_dist, tracks, dets, )
|
||||
return cost_matrix
|
||||
|
||||
def gate_cost_matrix(self, cost_matrix, tracks, dets, max_apperance_thresh=0.15, gated_cost=1e5, only_position=False):
|
||||
"""
|
||||
gate cost matrix by calculating the Kalman state distance and constrainted by
|
||||
0.95 confidence interval of x2 distribution
|
||||
|
||||
cost_matrix: np.ndarray, shape (len(tracks), len(dets))
|
||||
tracks: List[STrack]
|
||||
dets: List[STrack]
|
||||
gated_cost: a very largt const to infeasible associations
|
||||
only_position: use [xc, yc, a, h] as state vector or only use [xc, yc]
|
||||
|
||||
return:
|
||||
updated cost_matirx, np.ndarray
|
||||
"""
|
||||
gating_dim = 2 if only_position else 4
|
||||
gating_threshold = chi2inv95[gating_dim]
|
||||
measurements = np.asarray([Tracklet.tlwh_to_xyah(det.tlwh) for det in dets]) # (len(dets), 4)
|
||||
|
||||
cost_matrix[cost_matrix > max_apperance_thresh] = gated_cost
|
||||
for row, track in enumerate(tracks):
|
||||
gating_distance = track.kalman_filter.gating_distance(measurements, )
|
||||
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
|
||||
|
||||
cost_matrix[row] = self.lambda_ * cost_matrix[row] + (1 - self.lambda_) * gating_distance
|
||||
return cost_matrix
|
||||
|
||||
|
||||
def joint_tracklets(tlista, tlistb):
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def sub_tracklets(tlista, tlistb):
|
||||
tracklets = {}
|
||||
for t in tlista:
|
||||
tracklets[t.track_id] = t
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if tracklets.get(tid, 0):
|
||||
del tracklets[tid]
|
||||
return list(tracklets.values())
|
||||
|
||||
|
||||
def remove_duplicate_tracklets(trackletsa, trackletsb):
|
||||
pdist = iou_distance(trackletsa, trackletsb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = list(), list()
|
||||
for p, q in zip(*pairs):
|
||||
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
|
||||
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
|
||||
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
|
||||
return resa, resb
|
||||
366
yolov7-tracker-example/tracker/trackers/tracklet.py
Normal file
366
yolov7-tracker-example/tracker/trackers/tracklet.py
Normal file
@@ -0,0 +1,366 @@
|
||||
"""
|
||||
implements base elements of trajectory
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .kalman_filters.bytetrack_kalman import ByteKalman
|
||||
from .kalman_filters.botsort_kalman import BotKalman
|
||||
from .kalman_filters.ocsort_kalman import OCSORTKalman
|
||||
from .kalman_filters.sort_kalman import SORTKalman
|
||||
from .kalman_filters.strongsort_kalman import NSAKalman
|
||||
|
||||
MOTION_MODEL_DICT = {
|
||||
'sort': SORTKalman,
|
||||
'byte': ByteKalman,
|
||||
'bot': BotKalman,
|
||||
'ocsort': OCSORTKalman,
|
||||
'strongsort': NSAKalman,
|
||||
}
|
||||
|
||||
STATE_CONVERT_DICT = {
|
||||
'sort': 'xysa',
|
||||
'byte': 'xyah',
|
||||
'bot': 'xywh',
|
||||
'ocsort': 'xysa',
|
||||
'strongsort': 'xyah'
|
||||
}
|
||||
|
||||
class Tracklet(BaseTrack):
|
||||
def __init__(self, tlwh, score, category, motion='byte'):
|
||||
|
||||
# initial position
|
||||
self._tlwh = np.asarray(tlwh, dtype=np.float)
|
||||
self.is_activated = False
|
||||
|
||||
self.score = score
|
||||
self.category = category
|
||||
|
||||
# kalman
|
||||
self.motion = motion
|
||||
self.kalman_filter = MOTION_MODEL_DICT[motion]()
|
||||
|
||||
self.convert_func = self.__getattribute__('tlwh_to_' + STATE_CONVERT_DICT[motion])
|
||||
|
||||
# init kalman
|
||||
self.kalman_filter.initialize(self.convert_func(self._tlwh))
|
||||
|
||||
def predict(self):
|
||||
self.kalman_filter.predict()
|
||||
self.time_since_update += 1
|
||||
|
||||
def activate(self, frame_id):
|
||||
self.track_id = self.next_id()
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
if frame_id == 1:
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
self.start_frame = frame_id
|
||||
|
||||
|
||||
def re_activate(self, new_track, frame_id, new_id=False):
|
||||
|
||||
# TODO different convert
|
||||
self.kalman_filter.update(self.convert_func(new_track.tlwh))
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
if new_id:
|
||||
self.track_id = self.next_id()
|
||||
self.score = new_track.score
|
||||
|
||||
def update(self, new_track, frame_id):
|
||||
self.frame_id = frame_id
|
||||
|
||||
new_tlwh = new_track.tlwh
|
||||
self.score = new_track.score
|
||||
|
||||
self.kalman_filter.update(self.convert_func(new_tlwh))
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
|
||||
self.time_since_update = 0
|
||||
|
||||
@property
|
||||
def tlwh(self):
|
||||
"""Get current position in bounding box format `(top left x, top left y,
|
||||
width, height)`.
|
||||
"""
|
||||
return self.__getattribute__(STATE_CONVERT_DICT[self.motion] + '_to_tlwh')()
|
||||
|
||||
def xyah_to_tlwh(self, ):
|
||||
x = self.kalman_filter.kf.x
|
||||
ret = x[:4].copy()
|
||||
ret[2] *= ret[3]
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
def xywh_to_tlwh(self, ):
|
||||
x = self.kalman_filter.kf.x
|
||||
ret = x[:4].copy()
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
def xysa_to_tlwh(self, ):
|
||||
x = self.kalman_filter.kf.x
|
||||
ret = x[:4].copy()
|
||||
ret[2] = np.sqrt(x[2] * x[3])
|
||||
ret[3] = x[2] / ret[2]
|
||||
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
|
||||
class Tracklet_w_reid(Tracklet):
|
||||
"""
|
||||
Tracklet class with reid features, for botsort, deepsort, etc.
|
||||
"""
|
||||
|
||||
def __init__(self, tlwh, score, category, motion='byte',
|
||||
feat=None, feat_history=50):
|
||||
super().__init__(tlwh, score, category, motion)
|
||||
|
||||
self.smooth_feat = None # EMA feature
|
||||
self.curr_feat = None # current feature
|
||||
self.features = deque([], maxlen=feat_history) # all features
|
||||
if feat is not None:
|
||||
self.update_features(feat)
|
||||
|
||||
self.alpha = 0.9
|
||||
|
||||
def update_features(self, feat):
|
||||
feat /= np.linalg.norm(feat)
|
||||
self.curr_feat = feat
|
||||
if self.smooth_feat is None:
|
||||
self.smooth_feat = feat
|
||||
else:
|
||||
self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat
|
||||
self.features.append(feat)
|
||||
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
|
||||
|
||||
def re_activate(self, new_track, frame_id, new_id=False):
|
||||
|
||||
# TODO different convert
|
||||
if isinstance(self.kalman_filter, NSAKalman):
|
||||
self.kalman_filter.update(self.convert_func(new_track.tlwh), new_track.score)
|
||||
else:
|
||||
self.kalman_filter.update(self.convert_func(new_track.tlwh))
|
||||
|
||||
if new_track.curr_feat is not None:
|
||||
self.update_features(new_track.curr_feat)
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
if new_id:
|
||||
self.track_id = self.next_id()
|
||||
self.score = new_track.score
|
||||
|
||||
def update(self, new_track, frame_id):
|
||||
self.frame_id = frame_id
|
||||
|
||||
new_tlwh = new_track.tlwh
|
||||
self.score = new_track.score
|
||||
|
||||
if isinstance(self.kalman_filter, NSAKalman):
|
||||
self.kalman_filter.update(self.convert_func(new_tlwh), self.score)
|
||||
else:
|
||||
self.kalman_filter.update(self.convert_func(new_tlwh))
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
|
||||
|
||||
if new_track.curr_feat is not None:
|
||||
self.update_features(new_track.curr_feat)
|
||||
|
||||
self.time_since_update = 0
|
||||
|
||||
|
||||
class Tracklet_w_velocity(Tracklet):
|
||||
"""
|
||||
Tracklet class with reid features, for ocsort.
|
||||
"""
|
||||
|
||||
def __init__(self, tlwh, score, category, motion='byte', delta_t=3):
|
||||
super().__init__(tlwh, score, category, motion)
|
||||
|
||||
self.last_observation = np.array([-1, -1, -1, -1, -1]) # placeholder
|
||||
self.observations = dict()
|
||||
self.history_observations = []
|
||||
self.velocity = None
|
||||
self.delta_t = delta_t
|
||||
|
||||
self.age = 0 # mark the age
|
||||
|
||||
@staticmethod
|
||||
def speed_direction(bbox1, bbox2):
|
||||
cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0
|
||||
cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0
|
||||
speed = np.array([cy2 - cy1, cx2 - cx1])
|
||||
norm = np.sqrt((cy2 - cy1)**2 + (cx2 - cx1)**2) + 1e-6
|
||||
return speed / norm
|
||||
|
||||
def predict(self):
|
||||
self.kalman_filter.predict()
|
||||
|
||||
self.age += 1
|
||||
self.time_since_update += 1
|
||||
|
||||
def update(self, new_track, frame_id):
|
||||
self.frame_id = frame_id
|
||||
|
||||
new_tlwh = new_track.tlwh
|
||||
self.score = new_track.score
|
||||
|
||||
self.kalman_filter.update(self.convert_func(new_tlwh))
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.time_since_update = 0
|
||||
|
||||
# update velocity and history buffer
|
||||
new_tlbr = Tracklet_w_bbox_buffer.tlwh_to_tlbr(new_tlwh)
|
||||
|
||||
if self.last_observation.sum() >= 0: # no previous observation
|
||||
previous_box = None
|
||||
for i in range(self.delta_t):
|
||||
dt = self.delta_t - i
|
||||
if self.age - dt in self.observations:
|
||||
previous_box = self.observations[self.age-dt]
|
||||
break
|
||||
if previous_box is None:
|
||||
previous_box = self.last_observation
|
||||
"""
|
||||
Estimate the track speed direction with observations \Delta t steps away
|
||||
"""
|
||||
self.velocity = self.speed_direction(previous_box, new_tlbr)
|
||||
|
||||
new_observation = np.r_[new_tlbr, new_track.score]
|
||||
self.last_observation = new_observation
|
||||
self.observations[self.age] = new_observation
|
||||
self.history_observations.append(new_observation)
|
||||
|
||||
|
||||
|
||||
|
||||
class Tracklet_w_bbox_buffer(Tracklet):
|
||||
"""
|
||||
Tracklet class with buffer of bbox, for C_BIoU track.
|
||||
"""
|
||||
def __init__(self, tlwh, score, category, motion='byte'):
|
||||
super().__init__(tlwh, score, category, motion)
|
||||
|
||||
# params in motion state
|
||||
self.b1, self.b2, self.n = 0.3, 0.5, 5
|
||||
self.origin_bbox_buffer = deque() # a deque store the original bbox(tlwh) from t - self.n to t, where t is the last time detected
|
||||
self.origin_bbox_buffer.append(self._tlwh)
|
||||
# buffered bbox, two buffer sizes
|
||||
self.buffer_bbox1 = self.get_buffer_bbox(level=1)
|
||||
self.buffer_bbox2 = self.get_buffer_bbox(level=2)
|
||||
# motion state, s^{t + \delta} = o^t + (\delta / n) * \sum_{i=t-n+1}^t(o^i - o^{i-1}) = o^t + (\delta / n) * (o^t - o^{t - n})
|
||||
self.motion_state1 = self.buffer_bbox1.copy()
|
||||
self.motion_state2 = self.buffer_bbox2.copy()
|
||||
|
||||
def get_buffer_bbox(self, level=1, bbox=None):
|
||||
"""
|
||||
get buffered bbox as: (top, left, w, h) -> (top - bw, y - bh, w + 2bw, h + 2bh)
|
||||
level = 1: b = self.b1 level = 2: b = self.b2
|
||||
bbox: if not None, use bbox to calculate buffer_bbox, else use self._tlwh
|
||||
"""
|
||||
assert level in [1, 2], 'level must be 1 or 2'
|
||||
|
||||
b = self.b1 if level == 1 else self.b2
|
||||
|
||||
if bbox is None:
|
||||
buffer_bbox = self._tlwh + np.array([-b*self._tlwh[2], -b*self._tlwh[3], 2*b*self._tlwh[2], 2*b*self._tlwh[3]])
|
||||
else:
|
||||
buffer_bbox = bbox + np.array([-b*bbox[2], -b*bbox[3], 2*b*bbox[2], 2*b*bbox[3]])
|
||||
return np.maximum(0.0, buffer_bbox)
|
||||
|
||||
def re_activate(self, new_track, frame_id, new_id=False):
|
||||
|
||||
# TODO different convert
|
||||
self.kalman_filter.update(self.convert_func(new_track.tlwh))
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
if new_id:
|
||||
self.track_id = self.next_id()
|
||||
self.score = new_track.score
|
||||
|
||||
self._tlwh = new_track._tlwh
|
||||
# update stored bbox
|
||||
if (len(self.origin_bbox_buffer) > self.n):
|
||||
self.origin_bbox_buffer.popleft()
|
||||
self.origin_bbox_buffer.append(self._tlwh)
|
||||
else:
|
||||
self.origin_bbox_buffer.append(self._tlwh)
|
||||
|
||||
self.buffer_bbox1 = self.get_buffer_bbox(level=1)
|
||||
self.buffer_bbox2 = self.get_buffer_bbox(level=2)
|
||||
self.motion_state1 = self.buffer_bbox1.copy()
|
||||
self.motion_state2 = self.buffer_bbox2.copy()
|
||||
|
||||
def update(self, new_track, frame_id):
|
||||
self.frame_id = frame_id
|
||||
|
||||
new_tlwh = new_track.tlwh
|
||||
self.score = new_track.score
|
||||
|
||||
self.kalman_filter.update(self.convert_func(new_tlwh))
|
||||
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
|
||||
self.time_since_update = 0
|
||||
|
||||
# update stored bbox
|
||||
if (len(self.origin_bbox_buffer) > self.n):
|
||||
self.origin_bbox_buffer.popleft()
|
||||
self.origin_bbox_buffer.append(new_tlwh)
|
||||
else:
|
||||
self.origin_bbox_buffer.append(new_tlwh)
|
||||
|
||||
# update motion state
|
||||
if self.time_since_update: # have some unmatched frames
|
||||
if len(self.origin_bbox_buffer) < self.n:
|
||||
self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh)
|
||||
self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh)
|
||||
else: # s^{t + \delta} = o^t + (\delta / n) * (o^t - o^{t - n})
|
||||
motion_state = self.origin_bbox_buffer[-1] + \
|
||||
(self.time_since_update / self.n) * (self.origin_bbox_buffer[-1] - self.origin_bbox_buffer[0])
|
||||
self.motion_state1 = self.get_buffer_bbox(level=1, bbox=motion_state)
|
||||
self.motion_state2 = self.get_buffer_bbox(level=2, bbox=motion_state)
|
||||
|
||||
else: # no unmatched frames, use current detection as motion state
|
||||
self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh)
|
||||
self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh)
|
||||
|
||||
|
||||
class Tracklet_w_depth(Tracklet):
|
||||
"""
|
||||
tracklet with depth info (i.e., 2000 - y2), for SparseTrack
|
||||
"""
|
||||
|
||||
def __init__(self, tlwh, score, category, motion='byte'):
|
||||
super().__init__(tlwh, score, category, motion)
|
||||
|
||||
|
||||
@property
|
||||
# @jit(nopython=True)
|
||||
def deep_vec(self):
|
||||
"""Convert bounding box to format `((top left, bottom right)`, i.e.,
|
||||
`(top left, bottom right)`.
|
||||
"""
|
||||
ret = self.tlwh.copy()
|
||||
cx = ret[0] + 0.5 * ret[2]
|
||||
y2 = ret[1] + ret[3]
|
||||
lendth = 2000 - y2
|
||||
return np.asarray([cx, y2, lendth], dtype=np.float)
|
||||
Reference in New Issue
Block a user