This commit is contained in:
√(noham)²
2024-07-18 00:42:59 +02:00
parent 5c9313c4ca
commit 3cf13b815c
180 changed files with 34499 additions and 2 deletions

View File

@@ -0,0 +1,133 @@
import numpy as np
from collections import OrderedDict
class TrackState(object):
New = 0
Tracked = 1
Lost = 2
Removed = 3
class BaseTrack(object):
_count = 0
track_id = 0
is_activated = False
state = TrackState.New
history = OrderedDict()
features = []
curr_feature = None
score = 0
start_frame = 0
frame_id = 0
time_since_update = 0
# multi-camera
location = (np.inf, np.inf)
@property
def end_frame(self):
return self.frame_id
@staticmethod
def next_id():
BaseTrack._count += 1
return BaseTrack._count
def activate(self, *args):
raise NotImplementedError
def predict(self):
raise NotImplementedError
def update(self, *args, **kwargs):
raise NotImplementedError
def mark_lost(self):
self.state = TrackState.Lost
def mark_removed(self):
self.state = TrackState.Removed
@property
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
ret[:2] -= ret[2:] / 2
return ret
@property
def tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
@property
def xywh(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[:2] += ret[2:] / 2.0
return ret
@staticmethod
# @jit(nopython=True)
def tlwh_to_xyah(tlwh):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
@staticmethod
def tlwh_to_xywh(tlwh):
"""Convert bounding box to format `(center x, center y, width,
height)`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
return ret
@staticmethod
def tlwh_to_xysa(tlwh):
"""Convert bounding box to format `(center x, center y, width,
height)`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
ret[2] = tlwh[2] * tlwh[3]
ret[3] = tlwh[2] / tlwh[3]
return ret
def to_xyah(self):
return self.tlwh_to_xyah(self.tlwh)
def to_xywh(self):
return self.tlwh_to_xywh(self.tlwh)
@staticmethod
def tlbr_to_tlwh(tlbr):
ret = np.asarray(tlbr).copy()
ret[2:] -= ret[:2]
return ret
@staticmethod
# @jit(nopython=True)
def tlwh_to_tlbr(tlwh):
ret = np.asarray(tlwh).copy()
ret[2:] += ret[:2]
return ret
def __repr__(self):
return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)

View File

@@ -0,0 +1,329 @@
"""
Bot sort
"""
import numpy as np
import torch
from torchvision.ops import nms
import cv2
import torchvision.transforms as T
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet, Tracklet_w_reid
from .matching import *
from .reid_models.OSNet import *
from .reid_models.load_model_tools import load_pretrained_weights
from .reid_models.deepsort_reid import Extractor
from .camera_motion_compensation import GMC
REID_MODEL_DICT = {
'osnet_x1_0': osnet_x1_0,
'osnet_x0_75': osnet_x0_75,
'osnet_x0_5': osnet_x0_5,
'osnet_x0_25': osnet_x0_25,
'deepsort': Extractor
}
def load_reid_model(reid_model, reid_model_path):
if 'osnet' in reid_model:
func = REID_MODEL_DICT[reid_model]
model = func(num_classes=1, pretrained=False, )
load_pretrained_weights(model, reid_model_path)
model.cuda().eval()
elif 'deepsort' in reid_model:
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
else:
raise NotImplementedError
return model
class BotTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
self.with_reid = not args.discard_reid
self.reid_model, self.crop_transforms = None, None
if self.with_reid:
self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
self.crop_transforms = T.Compose([
# T.ToPILImage(),
# T.Resize(size=(256, 128)),
T.ToTensor(), # (c, 128, 256)
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# camera motion compensation module
self.gmc = GMC(method='orb', downscale=2, verbose=None)
def reid_preprocess(self, obj_bbox):
"""
preprocess cropped object bboxes
obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
return:
torch.Tensor of shape (c, 128, 256)
"""
obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=(128, 128)) # shape: (128, 256, c)
return self.crop_transforms(obj_bbox)
def get_feature(self, tlwhs, ori_img):
"""
get apperance feature of an object
tlwhs: shape (num_of_objects, 4)
ori_img: original image, np.ndarray, shape(H, W, C)
"""
obj_bbox = []
for tlwh in tlwhs:
tlwh = list(map(int, tlwh))
# if any(tlbr_ == -1 for tlbr_ in tlwh):
# print(tlwh)
tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
obj_bbox.append(tlbr_tensor)
if not obj_bbox:
return np.array([])
obj_bbox = torch.stack(obj_bbox, dim=0)
obj_bbox = obj_bbox.cuda()
features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim)
return features.cpu().detach().numpy()
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlwh format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
inds_low = scores > 0.1
inds_high = scores < self.args.conf_thresh
inds_second = np.logical_and(inds_low, inds_high)
dets_second = bboxes[inds_second]
dets = bboxes[remain_inds]
cates = categories[remain_inds]
cates_second = categories[inds_second]
scores_keep = scores[remain_inds]
scores_second = scores[inds_second]
"""Step 1: Extract reid features"""
if self.with_reid:
features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
if len(dets) > 0:
if self.with_reid:
detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
(tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
else:
detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with high score detection boxes'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
# Camera motion compensation
warp = self.gmc.apply(ori_img, dets)
self.gmc.multi_gmc(tracklet_pool, warp)
self.gmc.multi_gmc(unconfirmed, warp)
ious_dists = iou_distance(tracklet_pool, detections)
ious_dists_mask = (ious_dists > 0.5) # high conf iou
if self.with_reid:
# mixed cost matrix
emb_dists = embedding_distance(tracklet_pool, detections) / 2.0
raw_emb_dists = emb_dists.copy()
emb_dists[emb_dists > 0.25] = 1.0
emb_dists[ious_dists_mask] = 1.0
dists = np.minimum(ious_dists, emb_dists)
else:
dists = ious_dists
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
''' Step 3: Second association, with low score detection boxes'''
# association the untrack to the low score detections
if len(dets_second) > 0:
'''Detections'''
detections_second = [Tracklet(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
else:
detections_second = []
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
dists = iou_distance(r_tracked_tracklets, detections_second)
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_tracklets[itracked]
det = detections_second[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
for it in u_track:
track = r_tracked_tracklets[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
ious_dists = iou_distance(unconfirmed, detections)
ious_dists_mask = (ious_dists > 0.5)
if self.with_reid:
emb_dists = embedding_distance(unconfirmed, detections) / 2.0
raw_emb_dists = emb_dists.copy()
emb_dists[emb_dists > 0.25] = 1.0
emb_dists[ious_dists_mask] = 1.0
dists = np.minimum(ious_dists, emb_dists)
else:
dists = ious_dists
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,201 @@
"""
ByteTrack
"""
import numpy as np
from collections import deque
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet
from .matching import *
class ByteTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlbr format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
inds_low = scores > 0.1
inds_high = scores < self.args.conf_thresh
inds_second = np.logical_and(inds_low, inds_high)
dets_second = bboxes[inds_second]
dets = bboxes[remain_inds]
cates = categories[remain_inds]
cates_second = categories[inds_second]
scores_keep = scores[remain_inds]
scores_second = scores[inds_second]
if len(dets) > 0:
'''Detections'''
detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with high score detection boxes'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
dists = iou_distance(tracklet_pool, detections)
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
''' Step 3: Second association, with low score detection boxes'''
# association the untrack to the low score detections
if len(dets_second) > 0:
'''Detections'''
detections_second = [Tracklet(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
else:
detections_second = []
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
dists = iou_distance(r_tracked_tracklets, detections_second)
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_tracklets[itracked]
det = detections_second[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
for it in u_track:
track = r_tracked_tracklets[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,204 @@
"""
C_BIoU Track
"""
import numpy as np
from collections import deque
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet, Tracklet_w_bbox_buffer
from .matching import *
class C_BIoUTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlbr format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
inds_low = scores > 0.1
inds_high = scores < self.args.conf_thresh
inds_second = np.logical_and(inds_low, inds_high)
dets_second = bboxes[inds_second]
dets = bboxes[remain_inds]
cates = categories[remain_inds]
cates_second = categories[inds_second]
scores_keep = scores[remain_inds]
scores_second = scores[inds_second]
if len(dets) > 0:
'''Detections'''
detections = [Tracklet_w_bbox_buffer(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with high score detection boxes'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
dists = buffered_iou_distance(tracklet_pool, detections, level=1)
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
''' Step 3: Second association, with low score detection boxes'''
# association the untrack to the low score detections
if len(dets_second) > 0:
'''Detections'''
detections_second = [Tracklet_w_bbox_buffer(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
else:
detections_second = []
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
dists = buffered_iou_distance(r_tracked_tracklets, detections_second, level=2)
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_tracklets[itracked]
det = detections_second[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
for it in u_track:
track = r_tracked_tracklets[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = buffered_iou_distance(unconfirmed, detections, level=1)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,264 @@
import cv2
import numpy as np
import copy
import matplotlib.pyplot as plt
"""GMC Module"""
class GMC:
def __init__(self, method='orb', downscale=2, verbose=None):
super(GMC, self).__init__()
self.method = method
self.downscale = max(1, int(downscale))
if self.method == 'orb':
self.detector = cv2.FastFeatureDetector_create(20)
self.extractor = cv2.ORB_create()
self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
elif self.method == 'sift':
self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
self.matcher = cv2.BFMatcher(cv2.NORM_L2)
elif self.method == 'ecc':
number_of_iterations = 100
termination_eps = 1e-5
self.warp_mode = cv2.MOTION_EUCLIDEAN
self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
elif self.method == 'file' or self.method == 'files':
seqName = verbose[0]
ablation = verbose[1]
if ablation:
filePath = r'tracker/GMC_files/MOT17_ablation'
else:
filePath = r'tracker/GMC_files/MOTChallenge'
if '-FRCNN' in seqName:
seqName = seqName[:-6]
elif '-DPM' in seqName:
seqName = seqName[:-4]
elif '-SDP' in seqName:
seqName = seqName[:-4]
self.gmcFile = open(filePath + "/GMC-" + seqName + ".txt", 'r')
if self.gmcFile is None:
raise ValueError("Error: Unable to open GMC file in directory:" + filePath)
elif self.method == 'none' or self.method == 'None':
self.method = 'none'
else:
raise ValueError("Error: Unknown CMC method:" + method)
self.prevFrame = None
self.prevKeyPoints = None
self.prevDescriptors = None
self.initializedFirstFrame = False
def apply(self, raw_frame, detections=None):
if self.method == 'orb' or self.method == 'sift':
return self.applyFeaures(raw_frame, detections)
elif self.method == 'ecc':
return self.applyEcc(raw_frame, detections)
elif self.method == 'file':
return self.applyFile(raw_frame, detections)
elif self.method == 'none':
return np.eye(2, 3)
else:
return np.eye(2, 3)
def applyEcc(self, raw_frame, detections=None):
# Initialize
height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
H = np.eye(2, 3, dtype=np.float32)
# Downscale image (TODO: consider using pyramids)
if self.downscale > 1.0:
frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
width = width // self.downscale
height = height // self.downscale
# Handle first frame
if not self.initializedFirstFrame:
# Initialize data
self.prevFrame = frame.copy()
# Initialization done
self.initializedFirstFrame = True
return H
# Run the ECC algorithm. The results are stored in warp_matrix.
# (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
try:
(cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
except:
print('Warning: find transform failed. Set warp as identity')
return H
def applyFeaures(self, raw_frame, detections=None):
# Initialize
height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
H = np.eye(2, 3)
# Downscale image (TODO: consider using pyramids)
if self.downscale > 1.0:
# frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
width = width // self.downscale
height = height // self.downscale
# find the keypoints
mask = np.zeros_like(frame)
# mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255
mask[int(0.02 * height): int(0.98 * height), int(0.02 * width): int(0.98 * width)] = 255
if detections is not None:
for det in detections:
tlbr = (det[:4] / self.downscale).astype(np.int_)
mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0
keypoints = self.detector.detect(frame, mask)
# compute the descriptors
keypoints, descriptors = self.extractor.compute(frame, keypoints)
# Handle first frame
if not self.initializedFirstFrame:
# Initialize data
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
self.prevDescriptors = copy.copy(descriptors)
# Initialization done
self.initializedFirstFrame = True
return H
# Match descriptors.
knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)
# Filtered matches based on smallest spatial distance
matches = []
spatialDistances = []
maxSpatialDistance = 0.25 * np.array([width, height])
# Handle empty matches case
if len(knnMatches) == 0:
# Store to next iteration
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
self.prevDescriptors = copy.copy(descriptors)
return H
for m, n in knnMatches:
if m.distance < 0.9 * n.distance:
prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
currKeyPointLocation = keypoints[m.trainIdx].pt
spatialDistance = (prevKeyPointLocation[0] - currKeyPointLocation[0],
prevKeyPointLocation[1] - currKeyPointLocation[1])
if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \
(np.abs(spatialDistance[1]) < maxSpatialDistance[1]):
spatialDistances.append(spatialDistance)
matches.append(m)
meanSpatialDistances = np.mean(spatialDistances, 0)
stdSpatialDistances = np.std(spatialDistances, 0)
inliesrs = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances
goodMatches = []
prevPoints = []
currPoints = []
for i in range(len(matches)):
if inliesrs[i, 0] and inliesrs[i, 1]:
goodMatches.append(matches[i])
prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
currPoints.append(keypoints[matches[i].trainIdx].pt)
prevPoints = np.array(prevPoints)
currPoints = np.array(currPoints)
# Draw the keypoint matches on the output image
if 0:
matches_img = np.hstack((self.prevFrame, frame))
matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
W = np.size(self.prevFrame, 1)
for m in goodMatches:
prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
curr_pt[0] += W
color = np.random.randint(0, 255, (3,))
color = (int(color[0]), int(color[1]), int(color[2]))
matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)
matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)
plt.figure()
plt.imshow(matches_img)
plt.show()
# Find rigid matrix
if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
# Handle downscale
if self.downscale > 1.0:
H[0, 2] *= self.downscale
H[1, 2] *= self.downscale
else:
print('Warning: not enough matching points')
# Store to next iteration
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
self.prevDescriptors = copy.copy(descriptors)
return H
def applyFile(self, raw_frame, detections=None):
line = self.gmcFile.readline()
tokens = line.split("\t")
H = np.eye(2, 3, dtype=np.float_)
H[0, 0] = float(tokens[1])
H[0, 1] = float(tokens[2])
H[0, 2] = float(tokens[3])
H[1, 0] = float(tokens[4])
H[1, 1] = float(tokens[5])
H[1, 2] = float(tokens[6])
return H
@staticmethod
def multi_gmc(stracks, H=np.eye(2, 3)):
"""
GMC module prediction
:param stracks: List[Strack]
"""
if len(stracks) > 0:
multi_mean = np.asarray([st.kalman_filter.kf.x.copy() for st in stracks])
multi_covariance = np.asarray([st.kalman_filter.kf.P for st in stracks])
R = H[:2, :2]
R8x8 = np.kron(np.eye(4, dtype=float), R)
t = H[:2, 2]
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
mean = R8x8.dot(mean)
mean[:2] += t
cov = R8x8.dot(cov).dot(R8x8.transpose())
stracks[i].kalman_filter.kf.x = mean
stracks[i].kalman_filter.kf.P = cov

View File

@@ -0,0 +1,327 @@
"""
Deep Sort
"""
import numpy as np
import torch
from torchvision.ops import nms
import cv2
import torchvision.transforms as T
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet, Tracklet_w_reid
from .matching import *
from .reid_models.OSNet import *
from .reid_models.load_model_tools import load_pretrained_weights
from .reid_models.deepsort_reid import Extractor
REID_MODEL_DICT = {
'osnet_x1_0': osnet_x1_0,
'osnet_x0_75': osnet_x0_75,
'osnet_x0_5': osnet_x0_5,
'osnet_x0_25': osnet_x0_25,
'deepsort': Extractor
}
def load_reid_model(reid_model, reid_model_path):
if 'osnet' in reid_model:
func = REID_MODEL_DICT[reid_model]
model = func(num_classes=1, pretrained=False, )
load_pretrained_weights(model, reid_model_path)
model.cuda().eval()
elif 'deepsort' in reid_model:
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
else:
raise NotImplementedError
return model
class DeepSortTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
self.with_reid = not args.discard_reid
self.reid_model, self.crop_transforms = None, None
if self.with_reid:
self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
self.crop_transforms = T.Compose([
# T.ToPILImage(),
# T.Resize(size=(256, 128)),
T.ToTensor(), # (c, 128, 256)
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128)
def reid_preprocess(self, obj_bbox):
"""
preprocess cropped object bboxes
obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
return:
torch.Tensor of shape (c, 128, 256)
"""
obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size) # shape: (h, w, c)
return self.crop_transforms(obj_bbox)
def get_feature(self, tlwhs, ori_img):
"""
get apperance feature of an object
tlwhs: shape (num_of_objects, 4)
ori_img: original image, np.ndarray, shape(H, W, C)
"""
obj_bbox = []
for tlwh in tlwhs:
tlwh = list(map(int, tlwh))
# limit to the legal range
tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0)
tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
obj_bbox.append(tlbr_tensor)
if not obj_bbox:
return np.array([])
obj_bbox = torch.stack(obj_bbox, dim=0)
obj_bbox = obj_bbox.cuda()
features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim)
return features.cpu().detach().numpy()
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlbr format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
dets = bboxes[remain_inds]
cates = categories[remain_inds]
scores_keep = scores[remain_inds]
features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
if len(dets) > 0:
'''Detections'''
detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
(tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with appearance'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
matches, u_track, u_detection = matching_cascade(distance_metric=self.gated_metric,
matching_thresh=0.9,
cascade_depth=30,
tracks=tracklet_pool,
detections=detections
)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
'''Step 3: Second association, with iou'''
tracklet_for_iou = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
detection_for_iou = [detections[i] for i in u_detection]
dists = iou_distance(tracklet_for_iou, detection_for_iou)
matches, u_track, u_detection = linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = tracklet_for_iou[itracked]
det = detection_for_iou[idet]
if track.state == TrackState.Tracked:
track.update(detection_for_iou[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
for it in u_track:
track = tracklet_for_iou[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detection_for_iou[i] for i in u_detection]
dists = iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def gated_metric(self, tracks, dets):
"""
get cost matrix, firstly calculate apperence cost, then filter by Kalman state.
tracks: List[STrack]
dets: List[STrack]
"""
apperance_dist = nearest_embedding_distance(tracks=tracks, detections=dets, metric='cosine')
cost_matrix = self.gate_cost_matrix(apperance_dist, tracks, dets, )
return cost_matrix
def gate_cost_matrix(self, cost_matrix, tracks, dets, max_apperance_thresh=0.15, gated_cost=1e5, only_position=False):
"""
gate cost matrix by calculating the Kalman state distance and constrainted by
0.95 confidence interval of x2 distribution
cost_matrix: np.ndarray, shape (len(tracks), len(dets))
tracks: List[STrack]
dets: List[STrack]
gated_cost: a very largt const to infeasible associations
only_position: use [xc, yc, a, h] as state vector or only use [xc, yc]
return:
updated cost_matirx, np.ndarray
"""
gating_dim = 2 if only_position else 4
gating_threshold = chi2inv95[gating_dim]
measurements = np.asarray([Tracklet.tlwh_to_xyah(det.tlwh) for det in dets]) # (len(dets), 4)
cost_matrix[cost_matrix > max_apperance_thresh] = gated_cost
for row, track in enumerate(tracks):
gating_distance = track.kalman_filter.gating_distance(measurements, )
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
return cost_matrix
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,74 @@
from filterpy.kalman import KalmanFilter
import numpy as np
import scipy
class BaseKalman:
def __init__(self,
state_dim: int = 8,
observation_dim: int = 4,
F: np.ndarray = np.zeros((0, )),
P: np.ndarray = np.zeros((0, )),
Q: np.ndarray = np.zeros((0, )),
H: np.ndarray = np.zeros((0, )),
R: np.ndarray = np.zeros((0, )),
) -> None:
self.kf = KalmanFilter(dim_x=state_dim, dim_z=observation_dim, dim_u=0)
if F.shape[0] > 0: self.kf.F = F # if valid
if P.shape[0] > 0: self.kf.P = P
if Q.shape[0] > 0: self.kf.Q = Q
if H.shape[0] > 0: self.kf.H = H
if R.shape[0] > 0: self.kf.R = R
def initialize(self, observation):
return NotImplementedError
def predict(self, ):
self.kf.predict()
def update(self, observation, **kwargs):
self.kf.update(observation, self.R, self.H)
def get_state(self, ):
return self.kf.x
def gating_distance(self, measurements, only_position=False):
"""Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Parameters
----------
measurements : ndarray
An Nx4 dimensional matrix of N measurements, note the format (whether xywh or xyah or others)
should be identical to state definition
only_position : Optional[bool]
If True, distance computation is done with respect to the bounding
box center position only.
Returns
-------
ndarray
Returns an array of length N, where the i-th element contains the
squared Mahalanobis distance between (mean, covariance) and
`measurements[i]`.
"""
# map state space to measurement space
mean = self.kf.x.copy()
mean = np.dot(self.kf.H, mean)
covariance = np.linalg.multi_dot((self.kf.H, self.kf.P, self.kf.H.T))
if only_position:
mean, covariance = mean[:2], covariance[:2, :2]
measurements = measurements[:, :2]
cholesky_factor = np.linalg.cholesky(covariance)
d = measurements - mean
z = scipy.linalg.solve_triangular(
cholesky_factor, d.T, lower=True, check_finite=False,
overwrite_b=True)
squared_maha = np.sum(z * z, axis=0)
return squared_maha

View File

@@ -0,0 +1,99 @@
from numpy.core.multiarray import zeros as zeros
from .base_kalman import BaseKalman
import numpy as np
import cv2
class BotKalman(BaseKalman):
def __init__(self, ):
state_dim = 8 # [x, y, w, h, vx, vy, vw, vh]
observation_dim = 4
F = np.eye(state_dim, state_dim)
'''
[1, 0, 0, 0, 1, 0, 0]
[0, 1, 0, 0, 0, 1, 0]
...
'''
for i in range(state_dim // 2):
F[i, i + state_dim // 2] = 1
H = np.eye(state_dim // 2, state_dim)
super().__init__(state_dim=state_dim,
observation_dim=observation_dim,
F=F,
H=H)
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
def initialize(self, observation):
""" init x, P, Q, R
Args:
observation: x-y-w-h format
"""
# init x, P, Q, R
mean_pos = observation
mean_vel = np.zeros_like(observation)
self.kf.x = np.r_[mean_pos, mean_vel] # x_{0, 0}
std = [
2 * self._std_weight_position * observation[2], # related to h
2 * self._std_weight_position * observation[3],
2 * self._std_weight_position * observation[2],
2 * self._std_weight_position * observation[3],
10 * self._std_weight_velocity * observation[2],
10 * self._std_weight_velocity * observation[3],
10 * self._std_weight_velocity * observation[2],
10 * self._std_weight_velocity * observation[3],
]
self.kf.P = np.diag(np.square(std)) # P_{0, 0}
def predict(self, ):
""" predict step
x_{n + 1, n} = F * x_{n, n}
P_{n + 1, n} = F * P_{n, n} * F^T + Q
"""
std_pos = [
self._std_weight_position * self.kf.x[2],
self._std_weight_position * self.kf.x[3],
self._std_weight_position * self.kf.x[2],
self._std_weight_position * self.kf.x[3]]
std_vel = [
self._std_weight_velocity * self.kf.x[2],
self._std_weight_velocity * self.kf.x[3],
self._std_weight_velocity * self.kf.x[2],
self._std_weight_velocity * self.kf.x[3]]
Q = np.diag(np.square(np.r_[std_pos, std_vel]))
self.kf.predict(Q=Q)
def update(self, z):
""" update step
Args:
z: observation x-y-a-h format
K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
"""
std = [
self._std_weight_position * self.kf.x[2],
self._std_weight_position * self.kf.x[3],
self._std_weight_position * self.kf.x[2],
self._std_weight_position * self.kf.x[3]]
R = np.diag(np.square(std))
self.kf.update(z=z, R=R)

View File

@@ -0,0 +1,97 @@
from .base_kalman import BaseKalman
import numpy as np
class ByteKalman(BaseKalman):
def __init__(self, ):
state_dim = 8 # [x, y, a, h, vx, vy, va, vh]
observation_dim = 4
F = np.eye(state_dim, state_dim)
'''
[1, 0, 0, 0, 1, 0, 0]
[0, 1, 0, 0, 0, 1, 0]
...
'''
for i in range(state_dim // 2):
F[i, i + state_dim // 2] = 1
H = np.eye(state_dim // 2, state_dim)
super().__init__(state_dim=state_dim,
observation_dim=observation_dim,
F=F,
H=H)
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
def initialize(self, observation):
""" init x, P, Q, R
Args:
observation: x-y-a-h format
"""
# init x, P, Q, R
mean_pos = observation
mean_vel = np.zeros_like(observation)
self.kf.x = np.r_[mean_pos, mean_vel] # x_{0, 0}
std = [
2 * self._std_weight_position * observation[3], # related to h
2 * self._std_weight_position * observation[3],
1e-2,
2 * self._std_weight_position * observation[3],
10 * self._std_weight_velocity * observation[3],
10 * self._std_weight_velocity * observation[3],
1e-5,
10 * self._std_weight_velocity * observation[3],
]
self.kf.P = np.diag(np.square(std)) # P_{0, 0}
def predict(self, ):
""" predict step
x_{n + 1, n} = F * x_{n, n}
P_{n + 1, n} = F * P_{n, n} * F^T + Q
"""
std_pos = [
self._std_weight_position * self.kf.x[3],
self._std_weight_position * self.kf.x[3],
1e-2,
self._std_weight_position * self.kf.x[3]]
std_vel = [
self._std_weight_velocity * self.kf.x[3],
self._std_weight_velocity * self.kf.x[3],
1e-5,
self._std_weight_velocity * self.kf.x[3]]
Q = np.diag(np.square(np.r_[std_pos, std_vel]))
self.kf.predict(Q=Q)
def update(self, z):
""" update step
Args:
z: observation x-y-a-h format
K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
"""
std = [
self._std_weight_position * self.kf.x[3],
self._std_weight_position * self.kf.x[3],
1e-1,
self._std_weight_position * self.kf.x[3]]
R = np.diag(np.square(std))
self.kf.update(z=z, R=R)

View File

@@ -0,0 +1,144 @@
from numpy.core.multiarray import zeros as zeros
from .base_kalman import BaseKalman
import numpy as np
from copy import deepcopy
class OCSORTKalman(BaseKalman):
def __init__(self, ):
state_dim = 7 # [x, y, s, a, vx, vy, vs] s: area
observation_dim = 4
F = np.array([[1, 0, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 0, 1, 0],
[0, 0, 1, 0, 0, 0, 1],
[0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 1]])
H = np.eye(state_dim // 2 + 1, state_dim)
super().__init__(state_dim=state_dim,
observation_dim=observation_dim,
F=F,
H=H)
# TODO check
# give high uncertainty to the unobservable initial velocities
self.kf.R[2:, 2:] *= 10 # [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 10]]
self.kf.P[4:, 4:] *= 1000
self.kf.P *= 10
self.kf.Q[-1, -1] *= 0.01
self.kf.Q[4:, 4:] *= 0.01
# keep all observations
self.history_obs = []
self.attr_saved = None
self.observed = False
def initialize(self, observation):
"""
Args:
observation: x-y-s-a
"""
self.kf.x = self.kf.x.flatten()
self.kf.x[:4] = observation
def predict(self, ):
""" predict step
"""
# s + vs
if (self.kf.x[6] + self.kf.x[2] <= 0):
self.kf.x[6] *= 0.0
self.kf.predict()
def _freeze(self, ):
""" freeze all the param of Kalman
"""
self.attr_saved = deepcopy(self.kf.__dict__)
def _unfreeze(self, ):
""" when observe an lost object again, use the virtual trajectory
"""
if self.attr_saved is not None:
new_history = deepcopy(self.history_obs)
self.kf.__dict__ = self.attr_saved
self.history_obs = self.history_obs[:-1]
occur = [int(d is None) for d in new_history]
indices = np.where(np.array(occur)==0)[0]
index1 = indices[-2]
index2 = indices[-1]
box1 = new_history[index1]
x1, y1, s1, r1 = box1
w1 = np.sqrt(s1 * r1)
h1 = np.sqrt(s1 / r1)
box2 = new_history[index2]
x2, y2, s2, r2 = box2
w2 = np.sqrt(s2 * r2)
h2 = np.sqrt(s2 / r2)
time_gap = index2 - index1
dx = (x2-x1)/time_gap
dy = (y2-y1)/time_gap
dw = (w2-w1)/time_gap
dh = (h2-h1)/time_gap
for i in range(index2 - index1):
"""
The default virtual trajectory generation is by linear
motion (constant speed hypothesis), you could modify this
part to implement your own.
"""
x = x1 + (i+1) * dx
y = y1 + (i+1) * dy
w = w1 + (i+1) * dw
h = h1 + (i+1) * dh
s = w * h
r = w / float(h)
new_box = np.array([x, y, s, r]).reshape((4, 1))
"""
I still use predict-update loop here to refresh the parameters,
but this can be faster by directly modifying the internal parameters
as suggested in the paper. I keep this naive but slow way for
easy read and understanding
"""
self.kf.update(new_box)
if not i == (index2-index1-1):
self.kf.predict()
def update(self, z):
""" update step
For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman
Args:
z: observation x-y-s-a format
"""
self.history_obs.append(z)
if z is None:
if self.observed:
self._freeze()
self.observed = False
self.kf.update(z)
else:
if not self.observed: # Get observation, use online smoothing to re-update parameters
self._unfreeze()
self.kf.update(z)
self.observed = True

View File

@@ -0,0 +1,73 @@
from numpy.core.multiarray import zeros as zeros
from .base_kalman import BaseKalman
import numpy as np
from copy import deepcopy
class SORTKalman(BaseKalman):
def __init__(self, ):
state_dim = 7 # [x, y, s, a, vx, vy, vs] s: area
observation_dim = 4
F = np.array([[1, 0, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 0, 1, 0],
[0, 0, 1, 0, 0, 0, 1],
[0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 1]])
H = np.eye(state_dim // 2 + 1, state_dim)
super().__init__(state_dim=state_dim,
observation_dim=observation_dim,
F=F,
H=H)
# TODO check
# give high uncertainty to the unobservable initial velocities
self.kf.R[2:, 2:] *= 10 # [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 10]]
self.kf.P[4:, 4:] *= 1000
self.kf.P *= 10
self.kf.Q[-1, -1] *= 0.01
self.kf.Q[4:, 4:] *= 0.01
# keep all observations
self.history_obs = []
self.attr_saved = None
self.observed = False
def initialize(self, observation):
"""
Args:
observation: x-y-s-a
"""
self.kf.x = self.kf.x.flatten()
self.kf.x[:4] = observation
def predict(self, ):
""" predict step
"""
# s + vs
if (self.kf.x[6] + self.kf.x[2] <= 0):
self.kf.x[6] *= 0.0
self.kf.predict()
def update(self, z):
""" update step
For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman
Args:
z: observation x-y-s-a format
"""
self.kf.update(z)

View File

@@ -0,0 +1,101 @@
from .base_kalman import BaseKalman
import numpy as np
class NSAKalman(BaseKalman):
def __init__(self, ):
state_dim = 8 # [x, y, a, h, vx, vy, va, vh]
observation_dim = 4
F = np.eye(state_dim, state_dim)
'''
[1, 0, 0, 0, 1, 0, 0]
[0, 1, 0, 0, 0, 1, 0]
...
'''
for i in range(state_dim // 2):
F[i, i + state_dim // 2] = 1
H = np.eye(state_dim // 2, state_dim)
super().__init__(state_dim=state_dim,
observation_dim=observation_dim,
F=F,
H=H)
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
def initialize(self, observation):
""" init x, P, Q, R
Args:
observation: x-y-a-h format
"""
# init x, P, Q, R
mean_pos = observation
mean_vel = np.zeros_like(observation)
self.kf.x = np.r_[mean_pos, mean_vel] # x_{0, 0}
std = [
2 * self._std_weight_position * observation[3], # related to h
2 * self._std_weight_position * observation[3],
1e-2,
2 * self._std_weight_position * observation[3],
10 * self._std_weight_velocity * observation[3],
10 * self._std_weight_velocity * observation[3],
1e-5,
10 * self._std_weight_velocity * observation[3],
]
self.kf.P = np.diag(np.square(std)) # P_{0, 0}
def predict(self, ):
""" predict step
x_{n + 1, n} = F * x_{n, n}
P_{n + 1, n} = F * P_{n, n} * F^T + Q
"""
std_pos = [
self._std_weight_position * self.kf.x[3],
self._std_weight_position * self.kf.x[3],
1e-2,
self._std_weight_position * self.kf.x[3]]
std_vel = [
self._std_weight_velocity * self.kf.x[3],
self._std_weight_velocity * self.kf.x[3],
1e-5,
self._std_weight_velocity * self.kf.x[3]]
Q = np.diag(np.square(np.r_[std_pos, std_vel]))
self.kf.predict(Q=Q)
def update(self, z, score):
""" update step
Args:
z: observation x-y-a-h format
score: the detection score/confidence required by NSA kalman
K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1}
x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1})
P_{n, n} = (I - K_n * H) P_{n, n - 1} (I - K_n * H)^T + K_n R_n
"""
std = [
self._std_weight_position * self.kf.x[3],
self._std_weight_position * self.kf.x[3],
1e-1,
self._std_weight_position * self.kf.x[3]]
# NSA
std = [(1. - score) * x for x in std]
R = np.diag(np.square(std))
self.kf.update(z=z, R=R)

View File

@@ -0,0 +1,27 @@
from .base_kalman import BaseKalman
import numpy as np
class UCMCKalman(BaseKalman):
def __init__(self, ):
state_dim = 8
observation_dim = 4
F = np.eye(state_dim, state_dim)
'''
[1, 0, 0, 0, 1, 0, 0]
[0, 1, 0, 0, 0, 1, 0]
...
'''
for i in range(state_dim // 2):
F[i, i + state_dim // 2] = 1
H = np.eye(state_dim // 2, state_dim)
super().__init__(state_dim=state_dim,
observation_dim=observation_dim,
F=F,
H=H)
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160

View File

@@ -0,0 +1,388 @@
import cv2
import numpy as np
import scipy
import lap
from scipy.spatial.distance import cdist
import math
from cython_bbox import bbox_overlaps as bbox_ious
import time
chi2inv95 = {
1: 3.8415,
2: 5.9915,
3: 7.8147,
4: 9.4877,
5: 11.070,
6: 12.592,
7: 14.067,
8: 15.507,
9: 16.919}
def merge_matches(m1, m2, shape):
O,P,Q = shape
m1 = np.asarray(m1)
m2 = np.asarray(m2)
M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
mask = M1*M2
match = mask.nonzero()
match = list(zip(match[0], match[1]))
unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
return match, unmatched_O, unmatched_Q
def _indices_to_matches(cost_matrix, indices, thresh):
matched_cost = cost_matrix[tuple(zip(*indices))]
matched_mask = (matched_cost <= thresh)
matches = indices[matched_mask]
unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
return matches, unmatched_a, unmatched_b
def linear_assignment(cost_matrix, thresh):
if cost_matrix.size == 0:
return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
matches, unmatched_a, unmatched_b = [], [], []
cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
for ix, mx in enumerate(x):
if mx >= 0:
matches.append([ix, mx])
unmatched_a = np.where(x < 0)[0]
unmatched_b = np.where(y < 0)[0]
matches = np.asarray(matches)
return matches, unmatched_a, unmatched_b
def ious(atlbrs, btlbrs):
"""
Compute cost based on IoU
:type atlbrs: list[tlbr] | np.ndarray
:type atlbrs: list[tlbr] | np.ndarray
:rtype ious np.ndarray
"""
ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float64)
if ious.size == 0:
return ious
ious = bbox_ious(
np.ascontiguousarray(atlbrs, dtype=np.float64),
np.ascontiguousarray(btlbrs, dtype=np.float64)
)
return ious
def iou_distance(atracks, btracks):
"""
Compute cost based on IoU
:type atracks: list[STrack]
:type btracks: list[STrack]
:rtype cost_matrix np.ndarray
"""
if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
atlbrs = atracks
btlbrs = btracks
else:
atlbrs = [track.tlbr for track in atracks]
btlbrs = [track.tlbr for track in btracks]
_ious = ious(atlbrs, btlbrs)
cost_matrix = 1 - _ious
return cost_matrix
def v_iou_distance(atracks, btracks):
"""
Compute cost based on IoU
:type atracks: list[STrack]
:type btracks: list[STrack]
:rtype cost_matrix np.ndarray
"""
if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
atlbrs = atracks
btlbrs = btracks
else:
atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
_ious = ious(atlbrs, btlbrs)
cost_matrix = 1 - _ious
return cost_matrix
def embedding_distance(tracks, detections, metric='cosine'):
"""
:param tracks: list[STrack]
:param detections: list[BaseTrack]
:param metric:
:return: cost_matrix np.ndarray
"""
cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float64)
if cost_matrix.size == 0:
return cost_matrix
det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float64)
#for i, track in enumerate(tracks):
#cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float64)
cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features
return cost_matrix
def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
if cost_matrix.size == 0:
return cost_matrix
gating_dim = 2 if only_position else 4
gating_threshold = chi2inv95[gating_dim]
measurements = np.asarray([det.to_xyah() for det in detections])
for row, track in enumerate(tracks):
gating_distance = kf.gating_distance(
track.mean, track.covariance, measurements, only_position, metric='maha')
cost_matrix[row, gating_distance > gating_threshold] = np.inf
cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
return cost_matrix
def fuse_iou(cost_matrix, tracks, detections):
if cost_matrix.size == 0:
return cost_matrix
reid_sim = 1 - cost_matrix
iou_dist = iou_distance(tracks, detections)
iou_sim = 1 - iou_dist
fuse_sim = reid_sim * (1 + iou_sim) / 2
det_scores = np.array([det.score for det in detections])
det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
#fuse_sim = fuse_sim * (1 + det_scores) / 2
fuse_cost = 1 - fuse_sim
return fuse_cost
def fuse_score(cost_matrix, detections):
if cost_matrix.size == 0:
return cost_matrix
iou_sim = 1 - cost_matrix
det_scores = np.array([det.score for det in detections])
det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
fuse_sim = iou_sim * det_scores
fuse_cost = 1 - fuse_sim
return fuse_cost
def greedy_assignment_iou(dist, thresh):
matched_indices = []
if dist.shape[1] == 0:
return np.array(matched_indices, np.int32).reshape(-1, 2)
for i in range(dist.shape[0]):
j = dist[i].argmin()
if dist[i][j] < thresh:
dist[:, j] = 1.
matched_indices.append([j, i])
return np.array(matched_indices, np.int32).reshape(-1, 2)
def greedy_assignment(dists, threshs):
matches = greedy_assignment_iou(dists.T, threshs)
u_det = [d for d in range(dists.shape[1]) if not (d in matches[:, 1])]
u_track = [d for d in range(dists.shape[0]) if not (d in matches[:, 0])]
return matches, u_track, u_det
def fuse_score_matrix(cost_matrix, detections, tracks):
if cost_matrix.size == 0:
return cost_matrix
iou_sim = 1 - cost_matrix
det_scores = np.array([det.score for det in detections])
det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
trk_scores = np.array([trk.score for trk in tracks])
trk_scores = np.expand_dims(trk_scores, axis=1).repeat(cost_matrix.shape[1], axis=1)
mid_scores = (det_scores + trk_scores) / 2
fuse_sim = iou_sim * mid_scores
fuse_cost = 1 - fuse_sim
return fuse_cost
"""
calculate buffered IoU, used in C_BIoU_Tracker
"""
def buffered_iou_distance(atracks, btracks, level=1):
"""
atracks: list[C_BIoUSTrack], tracks
btracks: list[C_BIoUSTrack], detections
level: cascade level, 1 or 2
"""
assert level in [1, 2], 'level must be 1 or 2'
if level == 1: # use motion_state1(tracks) and buffer_bbox1(detections) to calculate
atlbrs = [track.tlwh_to_tlbr(track.motion_state1) for track in atracks]
btlbrs = [det.tlwh_to_tlbr(det.buffer_bbox1) for det in btracks]
else:
atlbrs = [track.tlwh_to_tlbr(track.motion_state2) for track in atracks]
btlbrs = [det.tlwh_to_tlbr(det.buffer_bbox2) for det in btracks]
_ious = ious(atlbrs, btlbrs)
cost_matrix = 1 - _ious
return cost_matrix
"""
observation centric association, with velocity, for OC Sort
"""
def observation_centric_association(tracklets, detections, iou_threshold, velocities, previous_obs, vdc_weight):
if(len(tracklets) == 0):
return np.empty((0, 2), dtype=int), tuple(range(len(tracklets))), tuple(range(len(detections)))
# get numpy format bboxes
trk_tlbrs = np.array([track.tlbr for track in tracklets])
det_tlbrs = np.array([det.tlbr for det in detections])
det_scores = np.array([det.score for det in detections])
iou_matrix = bbox_ious(trk_tlbrs, det_tlbrs)
Y, X = speed_direction_batch(det_tlbrs, previous_obs)
inertia_Y, inertia_X = velocities[:,0], velocities[:,1]
inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
diff_angle_cos = inertia_X * X + inertia_Y * Y
diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
diff_angle = np.arccos(diff_angle_cos)
diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
valid_mask = np.ones(previous_obs.shape[0])
valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
scores = np.repeat(det_scores[:, np.newaxis], trk_tlbrs.shape[0], axis=1)
valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
angle_diff_cost = angle_diff_cost * scores.T
matches, unmatched_a, unmatched_b = linear_assignment(- (iou_matrix + angle_diff_cost), thresh=0.9)
return matches, unmatched_a, unmatched_b
"""
helper func of observation_centric_association
"""
def speed_direction_batch(dets, tracks):
tracks = tracks[..., np.newaxis]
CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:,1] + dets[:,3]) / 2.0
CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0
dx = CX2 - CX1
dy = CY2 - CY1
norm = np.sqrt(dx**2 + dy**2) + 1e-6
dx = dx / norm
dy = dy / norm
return dy, dx # size: num_track x num_det
def matching_cascade(
distance_metric, matching_thresh, cascade_depth, tracks, detections,
track_indices=None, detection_indices=None):
"""
Run matching cascade in DeepSORT
distance_metirc: function that calculate the cost matrix
matching_thresh: float, Associations with cost larger than this value are disregarded.
cascade_path: int, equal to max_age of a tracklet
tracks: List[STrack], current tracks
detections: List[STrack], current detections
track_indices: List[int], tracks that will be calculated, Default None
detection_indices: List[int], detections that will be calculated, Default None
return:
matched pair, unmatched tracks, unmatced detections: List[int], List[int], List[int]
"""
if track_indices is None:
track_indices = list(range(len(tracks)))
if detection_indices is None:
detection_indices = list(range(len(detections)))
detections_to_match = detection_indices
matches = []
for level in range(cascade_depth):
"""
match new track with detection firstly
"""
if not len(detections_to_match): # No detections left
break
track_indices_l = [
k for k in track_indices
if tracks[k].time_since_update == 1 + level
] # filter tracks whose age is equal to level + 1 (The age of Newest track = 1)
if not len(track_indices_l): # Nothing to match at this level
continue
# tracks and detections which will be mathcted in current level
track_l = [tracks[idx] for idx in track_indices_l] # List[STrack]
det_l = [detections[idx] for idx in detections_to_match] # List[STrack]
# calculate the cost matrix
cost_matrix = distance_metric(track_l, det_l)
# solve the linear assignment problem
matched_row_col, umatched_row, umatched_col = \
linear_assignment(cost_matrix, matching_thresh)
for row, col in matched_row_col: # for those who matched
matches.append((track_indices_l[row], detections_to_match[col]))
umatched_detecion_l = [] # current detections not matched
for col in umatched_col: # for detections not matched
umatched_detecion_l.append(detections_to_match[col])
detections_to_match = umatched_detecion_l # update detections to match for next level
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
return matches, unmatched_tracks, detections_to_match
def nearest_embedding_distance(tracks, detections, metric='cosine'):
"""
different from embedding distance, this func calculate the
nearest distance among all track history features and detections
tracks: list[STrack]
detections: list[STrack]
metric: str, cosine or euclidean
TODO: support euclidean distance
return:
cost_matrix, np.ndarray, shape(len(tracks), len(detections))
"""
cost_matrix = np.zeros((len(tracks), len(detections)))
det_features = np.asarray([det.features[-1] for det in detections])
for row, track in enumerate(tracks):
track_history_features = np.asarray(track.features)
dist = 1. - cal_cosine_distance(track_history_features, det_features)
dist = dist.min(axis=0)
cost_matrix[row, :] = dist
return cost_matrix
def cal_cosine_distance(mat1, mat2):
"""
simple func to calculate cosine distance between 2 matrixs
:param mat1: np.ndarray, shape(M, dim)
:param mat2: np.ndarray, shape(N, dim)
:return: np.ndarray, shape(M, N)
"""
# result = mat1·mat2^T / |mat1|·|mat2|
# norm mat1 and mat2
mat1 = mat1 / np.linalg.norm(mat1, axis=1, keepdims=True)
mat2 = mat2 / np.linalg.norm(mat2, axis=1, keepdims=True)
return np.dot(mat1, mat2.T)

View File

@@ -0,0 +1,237 @@
"""
OC Sort
"""
import numpy as np
from collections import deque
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet, Tracklet_w_velocity
from .matching import *
from cython_bbox import bbox_overlaps as bbox_ious
class OCSortTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
self.delta_t = 3
@staticmethod
def k_previous_obs(observations, cur_age, k):
if len(observations) == 0:
return [-1, -1, -1, -1, -1]
for i in range(k):
dt = k - i
if cur_age - dt in observations:
return observations[cur_age-dt]
max_age = max(observations.keys())
return observations[max_age]
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlbr format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
inds_low = scores > 0.1
inds_high = scores < self.args.conf_thresh
inds_second = np.logical_and(inds_low, inds_high)
dets_second = bboxes[inds_second]
dets = bboxes[remain_inds]
cates = categories[remain_inds]
cates_second = categories[inds_second]
scores_keep = scores[remain_inds]
scores_second = scores[inds_second]
if len(dets) > 0:
'''Detections'''
detections = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, Observation Centric Momentum'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
velocities = np.array(
[trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in tracklet_pool])
# last observation, obervation-centric
# last_boxes = np.array([trk.last_observation for trk in tracklet_pool])
# historical observations
k_observations = np.array(
[self.k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in tracklet_pool])
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
# Observation centric cost matrix and assignment
matches, u_track, u_detection = observation_centric_association(
tracklets=tracklet_pool, detections=detections, iou_threshold=0.3,
velocities=velocities, previous_obs=k_observations, vdc_weight=0.2
)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
''' Step 3: Second association, with low score detection boxes'''
# association the untrack to the low score detections
if len(dets_second) > 0:
'''Detections'''
detections_second = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
else:
detections_second = []
r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
# for unmatched tracks in the first round, use last obervation
r_tracked_tracklets_last_observ = [tracklet_pool[i].last_observation[:4] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
detections_second_bbox = [det.tlbr for det in detections_second]
dists = 1. - ious(r_tracked_tracklets_last_observ, detections_second_bbox)
matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_tracklets[itracked]
det = detections_second[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
for it in u_track:
track = r_tracked_tracklets[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,98 @@
"""
AFLink code in StrongSORT(StrongSORT: Make DeepSORT Great Again(arxiv))
copied from origin repo
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import cv2
import logging
import torchvision.transforms as transforms
class TemporalBlock(nn.Module):
def __init__(self, cin, cout):
super(TemporalBlock, self).__init__()
self.conv = nn.Conv2d(cin, cout, (7, 1), bias=False)
self.relu = nn.ReLU(inplace=True)
self.bnf = nn.BatchNorm1d(cout)
self.bnx = nn.BatchNorm1d(cout)
self.bny = nn.BatchNorm1d(cout)
def bn(self, x):
x[:, :, :, 0] = self.bnf(x[:, :, :, 0])
x[:, :, :, 1] = self.bnx(x[:, :, :, 1])
x[:, :, :, 2] = self.bny(x[:, :, :, 2])
return x
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class FusionBlock(nn.Module):
def __init__(self, cin, cout):
super(FusionBlock, self).__init__()
self.conv = nn.Conv2d(cin, cout, (1, 3), bias=False)
self.bn = nn.BatchNorm2d(cout)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Classifier(nn.Module):
def __init__(self, cin):
super(Classifier, self).__init__()
self.fc1 = nn.Linear(cin*2, cin//2)
self.relu = nn.ReLU(inplace=True)
self.fc2 = nn.Linear(cin//2, 2)
def forward(self, x1, x2):
x = torch.cat((x1, x2), dim=1)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
class PostLinker(nn.Module):
def __init__(self):
super(PostLinker, self).__init__()
self.TemporalModule_1 = nn.Sequential(
TemporalBlock(1, 32),
TemporalBlock(32, 64),
TemporalBlock(64, 128),
TemporalBlock(128, 256)
)
self.TemporalModule_2 = nn.Sequential(
TemporalBlock(1, 32),
TemporalBlock(32, 64),
TemporalBlock(64, 128),
TemporalBlock(128, 256)
)
self.FusionBlock_1 = FusionBlock(256, 256)
self.FusionBlock_2 = FusionBlock(256, 256)
self.pooling = nn.AdaptiveAvgPool2d((1, 1))
self.classifier = Classifier(256)
def forward(self, x1, x2):
x1 = x1[:, :, :, :3]
x2 = x2[:, :, :, :3]
x1 = self.TemporalModule_1(x1) # [B,1,30,3] -> [B,256,6,3]
x2 = self.TemporalModule_2(x2)
x1 = self.FusionBlock_1(x1)
x2 = self.FusionBlock_2(x2)
x1 = self.pooling(x1).squeeze(-1).squeeze(-1)
x2 = self.pooling(x2).squeeze(-1).squeeze(-1)
y = self.classifier(x1, x2)
if not self.training:
y = torch.softmax(y, dim=1)
return y

View File

@@ -0,0 +1,598 @@
from __future__ import division, absolute_import
import warnings
import torch
from torch import nn
from torch.nn import functional as F
__all__ = [
'osnet_x1_0', 'osnet_x0_75', 'osnet_x0_5', 'osnet_x0_25', 'osnet_ibn_x1_0'
]
pretrained_urls = {
'osnet_x1_0':
'https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY',
'osnet_x0_75':
'https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq',
'osnet_x0_5':
'https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i',
'osnet_x0_25':
'https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs',
'osnet_ibn_x1_0':
'https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l'
}
##########
# Basic layers
##########
class ConvLayer(nn.Module):
"""Convolution layer (conv + bn + relu)."""
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
groups=1,
IN=False
):
super(ConvLayer, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=padding,
bias=False,
groups=groups
)
if IN:
self.bn = nn.InstanceNorm2d(out_channels, affine=True)
else:
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Conv1x1(nn.Module):
"""1x1 convolution + bn + relu."""
def __init__(self, in_channels, out_channels, stride=1, groups=1):
super(Conv1x1, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
1,
stride=stride,
padding=0,
bias=False,
groups=groups
)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Conv1x1Linear(nn.Module):
"""1x1 convolution + bn (w/o non-linearity)."""
def __init__(self, in_channels, out_channels, stride=1):
super(Conv1x1Linear, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, 1, stride=stride, padding=0, bias=False
)
self.bn = nn.BatchNorm2d(out_channels)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return x
class Conv3x3(nn.Module):
"""3x3 convolution + bn + relu."""
def __init__(self, in_channels, out_channels, stride=1, groups=1):
super(Conv3x3, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
3,
stride=stride,
padding=1,
bias=False,
groups=groups
)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class LightConv3x3(nn.Module):
"""Lightweight 3x3 convolution.
1x1 (linear) + dw 3x3 (nonlinear).
"""
def __init__(self, in_channels, out_channels):
super(LightConv3x3, self).__init__()
self.conv1 = nn.Conv2d(
in_channels, out_channels, 1, stride=1, padding=0, bias=False
)
self.conv2 = nn.Conv2d(
out_channels,
out_channels,
3,
stride=1,
padding=1,
bias=False,
groups=out_channels
)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.bn(x)
x = self.relu(x)
return x
##########
# Building blocks for omni-scale feature learning
##########
class ChannelGate(nn.Module):
"""A mini-network that generates channel-wise gates conditioned on input tensor."""
def __init__(
self,
in_channels,
num_gates=None,
return_gates=False,
gate_activation='sigmoid',
reduction=16,
layer_norm=False
):
super(ChannelGate, self).__init__()
if num_gates is None:
num_gates = in_channels
self.return_gates = return_gates
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.fc1 = nn.Conv2d(
in_channels,
in_channels // reduction,
kernel_size=1,
bias=True,
padding=0
)
self.norm1 = None
if layer_norm:
self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
self.relu = nn.ReLU(inplace=True)
self.fc2 = nn.Conv2d(
in_channels // reduction,
num_gates,
kernel_size=1,
bias=True,
padding=0
)
if gate_activation == 'sigmoid':
self.gate_activation = nn.Sigmoid()
elif gate_activation == 'relu':
self.gate_activation = nn.ReLU(inplace=True)
elif gate_activation == 'linear':
self.gate_activation = None
else:
raise RuntimeError(
"Unknown gate activation: {}".format(gate_activation)
)
def forward(self, x):
input = x
x = self.global_avgpool(x)
x = self.fc1(x)
if self.norm1 is not None:
x = self.norm1(x)
x = self.relu(x)
x = self.fc2(x)
if self.gate_activation is not None:
x = self.gate_activation(x)
if self.return_gates:
return x
return input * x
class OSBlock(nn.Module):
"""Omni-scale feature learning block."""
def __init__(
self,
in_channels,
out_channels,
IN=False,
bottleneck_reduction=4,
**kwargs
):
super(OSBlock, self).__init__()
mid_channels = out_channels // bottleneck_reduction
self.conv1 = Conv1x1(in_channels, mid_channels)
self.conv2a = LightConv3x3(mid_channels, mid_channels)
self.conv2b = nn.Sequential(
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
)
self.conv2c = nn.Sequential(
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
)
self.conv2d = nn.Sequential(
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
)
self.gate = ChannelGate(mid_channels)
self.conv3 = Conv1x1Linear(mid_channels, out_channels)
self.downsample = None
if in_channels != out_channels:
self.downsample = Conv1x1Linear(in_channels, out_channels)
self.IN = None
if IN:
self.IN = nn.InstanceNorm2d(out_channels, affine=True)
def forward(self, x):
identity = x
x1 = self.conv1(x)
x2a = self.conv2a(x1)
x2b = self.conv2b(x1)
x2c = self.conv2c(x1)
x2d = self.conv2d(x1)
x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
x3 = self.conv3(x2)
if self.downsample is not None:
identity = self.downsample(identity)
out = x3 + identity
if self.IN is not None:
out = self.IN(out)
return F.relu(out)
##########
# Network architecture
##########
class OSNet(nn.Module):
"""Omni-Scale Network.
Reference:
- Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.
- Zhou et al. Learning Generalisable Omni-Scale Representations
for Person Re-Identification. TPAMI, 2021.
"""
def __init__(
self,
num_classes,
blocks,
layers,
channels,
feature_dim=512,
loss='softmax',
IN=False,
**kwargs
):
super(OSNet, self).__init__()
num_blocks = len(blocks)
assert num_blocks == len(layers)
assert num_blocks == len(channels) - 1
self.loss = loss
self.feature_dim = feature_dim
# convolutional backbone
self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
self.conv2 = self._make_layer(
blocks[0],
layers[0],
channels[0],
channels[1],
reduce_spatial_size=True,
IN=IN
)
self.conv3 = self._make_layer(
blocks[1],
layers[1],
channels[1],
channels[2],
reduce_spatial_size=True
)
self.conv4 = self._make_layer(
blocks[2],
layers[2],
channels[2],
channels[3],
reduce_spatial_size=False
)
self.conv5 = Conv1x1(channels[3], channels[3])
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
# fully connected layer
self.fc = self._construct_fc_layer(
self.feature_dim, channels[3], dropout_p=None
)
# identity classification layer
self.classifier = nn.Linear(self.feature_dim, num_classes)
self._init_params()
def _make_layer(
self,
block,
layer,
in_channels,
out_channels,
reduce_spatial_size,
IN=False
):
layers = []
layers.append(block(in_channels, out_channels, IN=IN))
for i in range(1, layer):
layers.append(block(out_channels, out_channels, IN=IN))
if reduce_spatial_size:
layers.append(
nn.Sequential(
Conv1x1(out_channels, out_channels),
nn.AvgPool2d(2, stride=2)
)
)
return nn.Sequential(*layers)
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
if fc_dims is None or fc_dims < 0:
self.feature_dim = input_dim
return None
if isinstance(fc_dims, int):
fc_dims = [fc_dims]
layers = []
for dim in fc_dims:
layers.append(nn.Linear(input_dim, dim))
layers.append(nn.BatchNorm1d(dim))
layers.append(nn.ReLU(inplace=True))
if dropout_p is not None:
layers.append(nn.Dropout(p=dropout_p))
input_dim = dim
self.feature_dim = fc_dims[-1]
return nn.Sequential(*layers)
def _init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu'
)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def featuremaps(self, x):
x = self.conv1(x)
x = self.maxpool(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
return x
def forward(self, x, return_featuremaps=False):
x = self.featuremaps(x)
if return_featuremaps:
return x
v = self.global_avgpool(x)
v = v.view(v.size(0), -1)
if self.fc is not None:
v = self.fc(v)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
def init_pretrained_weights(model, key=''):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
import os
import errno
import gdown
from collections import OrderedDict
def _get_torch_home():
ENV_TORCH_HOME = 'TORCH_HOME'
ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
DEFAULT_CACHE_DIR = '~/.cache'
torch_home = os.path.expanduser(
os.getenv(
ENV_TORCH_HOME,
os.path.join(
os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
)
)
)
return torch_home
torch_home = _get_torch_home()
model_dir = os.path.join(torch_home, 'checkpoints')
try:
os.makedirs(model_dir)
except OSError as e:
if e.errno == errno.EEXIST:
# Directory already exists, ignore.
pass
else:
# Unexpected OSError, re-raise.
raise
filename = key + '_imagenet.pth'
cached_file = os.path.join(model_dir, filename)
if not os.path.exists(cached_file):
gdown.download(pretrained_urls[key], cached_file, quiet=False)
state_dict = torch.load(cached_file)
model_dict = model.state_dict()
new_state_dict = OrderedDict()
matched_layers, discarded_layers = [], []
for k, v in state_dict.items():
if k.startswith('module.'):
k = k[7:] # discard module.
if k in model_dict and model_dict[k].size() == v.size():
new_state_dict[k] = v
matched_layers.append(k)
else:
discarded_layers.append(k)
model_dict.update(new_state_dict)
model.load_state_dict(model_dict)
if len(matched_layers) == 0:
warnings.warn(
'The pretrained weights from "{}" cannot be loaded, '
'please check the key names manually '
'(** ignored and continue **)'.format(cached_file)
)
else:
print(
'Successfully loaded imagenet pretrained weights from "{}"'.
format(cached_file)
)
if len(discarded_layers) > 0:
print(
'** The following layers are discarded '
'due to unmatched keys or layer size: {}'.
format(discarded_layers)
)
##########
# Instantiation
##########
def osnet_x1_0(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
# standard size (width x1.0)
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[64, 256, 384, 512],
loss=loss,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_x1_0')
return model
def osnet_x0_75(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
# medium size (width x0.75)
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[48, 192, 288, 384],
loss=loss,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_x0_75')
return model
def osnet_x0_5(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
# tiny size (width x0.5)
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[32, 128, 192, 256],
loss=loss,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_x0_5')
return model
def osnet_x0_25(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
# very tiny size (width x0.25)
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[16, 64, 96, 128],
loss=loss,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_x0_25')
return model
def osnet_ibn_x1_0(
num_classes=1000, pretrained=True, loss='softmax', **kwargs
):
# standard size (width x1.0) + IBN layer
# Ref: Pan et al. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. ECCV, 2018.
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[64, 256, 384, 512],
loss=loss,
IN=True,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_ibn_x1_0')
return model

View File

@@ -0,0 +1,3 @@
"""
file for reid_models folder
"""

View File

@@ -0,0 +1,157 @@
"""
file for DeepSORT Re-ID model
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import cv2
import logging
import torchvision.transforms as transforms
class BasicBlock(nn.Module):
def __init__(self, c_in, c_out, is_downsample=False):
super(BasicBlock, self).__init__()
self.is_downsample = is_downsample
if is_downsample:
self.conv1 = nn.Conv2d(
c_in, c_out, 3, stride=2, padding=1, bias=False)
else:
self.conv1 = nn.Conv2d(
c_in, c_out, 3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(c_out)
self.relu = nn.ReLU(True)
self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(c_out)
if is_downsample:
self.downsample = nn.Sequential(
nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
nn.BatchNorm2d(c_out)
)
elif c_in != c_out:
self.downsample = nn.Sequential(
nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
nn.BatchNorm2d(c_out)
)
self.is_downsample = True
def forward(self, x):
y = self.conv1(x)
y = self.bn1(y)
y = self.relu(y)
y = self.conv2(y)
y = self.bn2(y)
if self.is_downsample:
x = self.downsample(x)
return F.relu(x.add(y), True)
def make_layers(c_in, c_out, repeat_times, is_downsample=False):
blocks = []
for i in range(repeat_times):
if i == 0:
blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ]
else:
blocks += [BasicBlock(c_out, c_out), ]
return nn.Sequential(*blocks)
class Net(nn.Module):
def __init__(self, num_classes=751, reid=False):
super(Net, self).__init__()
# 3 128 64
self.conv = nn.Sequential(
nn.Conv2d(3, 64, 3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
# nn.Conv2d(32,32,3,stride=1,padding=1),
# nn.BatchNorm2d(32),
# nn.ReLU(inplace=True),
nn.MaxPool2d(3, 2, padding=1),
)
# 32 64 32
self.layer1 = make_layers(64, 64, 2, False)
# 32 64 32
self.layer2 = make_layers(64, 128, 2, True)
# 64 32 16
self.layer3 = make_layers(128, 256, 2, True)
# 128 16 8
self.layer4 = make_layers(256, 512, 2, True)
# 256 8 4
self.avgpool = nn.AvgPool2d((8, 4), 1)
# 256 1 1
self.reid = reid
self.classifier = nn.Sequential(
nn.Linear(512, 256),
nn.BatchNorm1d(256),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(256, num_classes),
)
def forward(self, x):
x = self.conv(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
# B x 128
if self.reid:
x = x.div(x.norm(p=2, dim=1, keepdim=True))
return x
# classifier
x = self.classifier(x)
return x
class Extractor(object):
def __init__(self, model_path, use_cuda=True):
self.net = Net(reid=True)
self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
state_dict = torch.load(model_path, map_location=torch.device(self.device))[
'net_dict']
self.net.load_state_dict(state_dict)
logger = logging.getLogger("root.tracker")
logger.info("Loading weights from {}... Done!".format(model_path))
self.net.to(self.device)
self.size = (64, 128)
self.norm = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
def _preprocess(self, im_crops):
"""
TODO:
1. to float with scale from 0 to 1
2. resize to (64, 128) as Market1501 dataset did
3. concatenate to a numpy array
3. to torch Tensor
4. normalize
"""
def _resize(im, size):
try:
return cv2.resize(im.astype(np.float32)/255., size)
except:
print('Error: size in bbox exists zero, ', im.shape)
exit(0)
im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(
0) for im in im_crops], dim=0).float()
return im_batch
def __call__(self, im_crops):
if isinstance(im_crops, list):
im_batch = self._preprocess(im_crops)
else:
im_batch = im_crops
with torch.no_grad():
im_batch = im_batch.to(self.device)
features = self.net(im_batch)
return features

View File

@@ -0,0 +1,273 @@
"""
load checkpoint file
copied from https://github.com/mikel-brostrom/Yolov5_StrongSORT_OSNet
"""
from __future__ import division, print_function, absolute_import
import pickle
import shutil
import os.path as osp
import warnings
from functools import partial
from collections import OrderedDict
import torch
import torch.nn as nn
__all__ = [
'save_checkpoint', 'load_checkpoint', 'resume_from_checkpoint',
'open_all_layers', 'open_specified_layers', 'count_num_param',
'load_pretrained_weights'
]
def load_checkpoint(fpath):
r"""Loads checkpoint.
``UnicodeDecodeError`` can be well handled, which means
python2-saved files can be read from python3.
Args:
fpath (str): path to checkpoint.
Returns:
dict
Examples::
>>> from torchreid.utils import load_checkpoint
>>> fpath = 'log/my_model/model.pth.tar-10'
>>> checkpoint = load_checkpoint(fpath)
"""
if fpath is None:
raise ValueError('File path is None')
fpath = osp.abspath(osp.expanduser(fpath))
if not osp.exists(fpath):
raise FileNotFoundError('File is not found at "{}"'.format(fpath))
map_location = None if torch.cuda.is_available() else 'cpu'
try:
checkpoint = torch.load(fpath, map_location=map_location)
except UnicodeDecodeError:
pickle.load = partial(pickle.load, encoding="latin1")
pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1")
checkpoint = torch.load(
fpath, pickle_module=pickle, map_location=map_location
)
except Exception:
print('Unable to load checkpoint from "{}"'.format(fpath))
raise
return checkpoint
def resume_from_checkpoint(fpath, model, optimizer=None, scheduler=None):
r"""Resumes training from a checkpoint.
This will load (1) model weights and (2) ``state_dict``
of optimizer if ``optimizer`` is not None.
Args:
fpath (str): path to checkpoint.
model (nn.Module): model.
optimizer (Optimizer, optional): an Optimizer.
scheduler (LRScheduler, optional): an LRScheduler.
Returns:
int: start_epoch.
Examples::
>>> from torchreid.utils import resume_from_checkpoint
>>> fpath = 'log/my_model/model.pth.tar-10'
>>> start_epoch = resume_from_checkpoint(
>>> fpath, model, optimizer, scheduler
>>> )
"""
print('Loading checkpoint from "{}"'.format(fpath))
checkpoint = load_checkpoint(fpath)
model.load_state_dict(checkpoint['state_dict'])
print('Loaded model weights')
if optimizer is not None and 'optimizer' in checkpoint.keys():
optimizer.load_state_dict(checkpoint['optimizer'])
print('Loaded optimizer')
if scheduler is not None and 'scheduler' in checkpoint.keys():
scheduler.load_state_dict(checkpoint['scheduler'])
print('Loaded scheduler')
start_epoch = checkpoint['epoch']
print('Last epoch = {}'.format(start_epoch))
if 'rank1' in checkpoint.keys():
print('Last rank1 = {:.1%}'.format(checkpoint['rank1']))
return start_epoch
def adjust_learning_rate(
optimizer,
base_lr,
epoch,
stepsize=20,
gamma=0.1,
linear_decay=False,
final_lr=0,
max_epoch=100
):
r"""Adjusts learning rate.
Deprecated.
"""
if linear_decay:
# linearly decay learning rate from base_lr to final_lr
frac_done = epoch / max_epoch
lr = frac_done*final_lr + (1.-frac_done) * base_lr
else:
# decay learning rate by gamma for every stepsize
lr = base_lr * (gamma**(epoch // stepsize))
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def set_bn_to_eval(m):
r"""Sets BatchNorm layers to eval mode."""
# 1. no update for running mean and var
# 2. scale and shift parameters are still trainable
classname = m.__class__.__name__
if classname.find('BatchNorm') != -1:
m.eval()
def open_all_layers(model):
r"""Opens all layers in model for training.
Examples::
>>> from torchreid.utils import open_all_layers
>>> open_all_layers(model)
"""
model.train()
for p in model.parameters():
p.requires_grad = True
def open_specified_layers(model, open_layers):
r"""Opens specified layers in model for training while keeping
other layers frozen.
Args:
model (nn.Module): neural net model.
open_layers (str or list): layers open for training.
Examples::
>>> from torchreid.utils import open_specified_layers
>>> # Only model.classifier will be updated.
>>> open_layers = 'classifier'
>>> open_specified_layers(model, open_layers)
>>> # Only model.fc and model.classifier will be updated.
>>> open_layers = ['fc', 'classifier']
>>> open_specified_layers(model, open_layers)
"""
if isinstance(model, nn.DataParallel):
model = model.module
if isinstance(open_layers, str):
open_layers = [open_layers]
for layer in open_layers:
assert hasattr(
model, layer
), '"{}" is not an attribute of the model, please provide the correct name'.format(
layer
)
for name, module in model.named_children():
if name in open_layers:
module.train()
for p in module.parameters():
p.requires_grad = True
else:
module.eval()
for p in module.parameters():
p.requires_grad = False
def count_num_param(model):
r"""Counts number of parameters in a model while ignoring ``self.classifier``.
Args:
model (nn.Module): network model.
Examples::
>>> from torchreid.utils import count_num_param
>>> model_size = count_num_param(model)
.. warning::
This method is deprecated in favor of
``torchreid.utils.compute_model_complexity``.
"""
warnings.warn(
'This method is deprecated and will be removed in the future.'
)
num_param = sum(p.numel() for p in model.parameters())
if isinstance(model, nn.DataParallel):
model = model.module
if hasattr(model,
'classifier') and isinstance(model.classifier, nn.Module):
# we ignore the classifier because it is unused at test time
num_param -= sum(p.numel() for p in model.classifier.parameters())
return num_param
def load_pretrained_weights(model, weight_path):
r"""Loads pretrianed weights to model.
Features::
- Incompatible layers (unmatched in name or size) will be ignored.
- Can automatically deal with keys containing "module.".
Args:
model (nn.Module): network model.
weight_path (str): path to pretrained weights.
Examples::
>>> from torchreid.utils import load_pretrained_weights
>>> weight_path = 'log/my_model/model-best.pth.tar'
>>> load_pretrained_weights(model, weight_path)
"""
checkpoint = load_checkpoint(weight_path)
if 'state_dict' in checkpoint:
state_dict = checkpoint['state_dict']
else:
state_dict = checkpoint
model_dict = model.state_dict()
new_state_dict = OrderedDict()
matched_layers, discarded_layers = [], []
for k, v in state_dict.items():
if k.startswith('module.'):
k = k[7:] # discard module.
if k in model_dict and model_dict[k].size() == v.size():
new_state_dict[k] = v
matched_layers.append(k)
else:
discarded_layers.append(k)
model_dict.update(new_state_dict)
model.load_state_dict(model_dict)
if len(matched_layers) == 0:
warnings.warn(
'The pretrained weights "{}" cannot be loaded, '
'please check the key names manually '
'(** ignored and continue **)'.format(weight_path)
)
else:
print(
'Successfully loaded pretrained weights from "{}"'.
format(weight_path)
)
if len(discarded_layers) > 0:
print(
'** The following layers are discarded '
'due to unmatched keys or layer size: {}'.
format(discarded_layers)
)

View File

@@ -0,0 +1,169 @@
"""
Sort
"""
import numpy as np
from collections import deque
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet
from .matching import *
class SortTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlbr format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
dets = bboxes[remain_inds]
cates = categories[remain_inds]
scores_keep = scores[remain_inds]
if len(dets) > 0:
'''Detections'''
detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with high score detection boxes'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
dists = iou_distance(tracklet_pool, detections)
matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 3: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 4: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,338 @@
"""
Bot sort
"""
import numpy as np
import torch
from torchvision.ops import nms
import cv2
import torchvision.transforms as T
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet, Tracklet_w_depth
from .matching import *
from .reid_models.OSNet import *
from .reid_models.load_model_tools import load_pretrained_weights
from .reid_models.deepsort_reid import Extractor
from .camera_motion_compensation import GMC
REID_MODEL_DICT = {
'osnet_x1_0': osnet_x1_0,
'osnet_x0_75': osnet_x0_75,
'osnet_x0_5': osnet_x0_5,
'osnet_x0_25': osnet_x0_25,
'deepsort': Extractor
}
def load_reid_model(reid_model, reid_model_path):
if 'osnet' in reid_model:
func = REID_MODEL_DICT[reid_model]
model = func(num_classes=1, pretrained=False, )
load_pretrained_weights(model, reid_model_path)
model.cuda().eval()
elif 'deepsort' in reid_model:
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
else:
raise NotImplementedError
return model
class SparseTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
# camera motion compensation module
self.gmc = GMC(method='orb', downscale=2, verbose=None)
def get_deep_range(self, obj, step):
col = []
for t in obj:
lend = (t.deep_vec)[2]
col.append(lend)
max_len, mix_len = max(col), min(col)
if max_len != mix_len:
deep_range =np.arange(mix_len, max_len, (max_len - mix_len + 1) / step)
if deep_range[-1] < max_len:
deep_range = np.concatenate([deep_range, np.array([max_len],)])
deep_range[0] = np.floor(deep_range[0])
deep_range[-1] = np.ceil(deep_range[-1])
else:
deep_range = [mix_len,]
mask = self.get_sub_mask(deep_range, col)
return mask
def get_sub_mask(self, deep_range, col):
mix_len=deep_range[0]
max_len=deep_range[-1]
if max_len == mix_len:
lc = mix_len
mask = []
for d in deep_range:
if d > deep_range[0] and d < deep_range[-1]:
mask.append((col >= lc) & (col < d))
lc = d
elif d == deep_range[-1]:
mask.append((col >= lc) & (col <= d))
lc = d
else:
lc = d
continue
return mask
# core function
def DCM(self, detections, tracks, activated_tracklets, refind_tracklets, levels, thresh, is_fuse):
if len(detections) > 0:
det_mask = self.get_deep_range(detections, levels)
else:
det_mask = []
if len(tracks)!=0:
track_mask = self.get_deep_range(tracks, levels)
else:
track_mask = []
u_detection, u_tracks, res_det, res_track = [], [], [], []
if len(track_mask) != 0:
if len(track_mask) < len(det_mask):
for i in range(len(det_mask) - len(track_mask)):
idx = np.argwhere(det_mask[len(track_mask) + i] == True)
for idd in idx:
res_det.append(detections[idd[0]])
elif len(track_mask) > len(det_mask):
for i in range(len(track_mask) - len(det_mask)):
idx = np.argwhere(track_mask[len(det_mask) + i] == True)
for idd in idx:
res_track.append(tracks[idd[0]])
for dm, tm in zip(det_mask, track_mask):
det_idx = np.argwhere(dm == True)
trk_idx = np.argwhere(tm == True)
# search det
det_ = []
for idd in det_idx:
det_.append(detections[idd[0]])
det_ = det_ + u_detection
# search trk
track_ = []
for idt in trk_idx:
track_.append(tracks[idt[0]])
# update trk
track_ = track_ + u_tracks
dists = iou_distance(track_, det_)
matches, u_track_, u_det_ = linear_assignment(dists, thresh)
for itracked, idet in matches:
track = track_[itracked]
det = det_[idet]
if track.state == TrackState.Tracked:
track.update(det_[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
u_tracks = [track_[t] for t in u_track_]
u_detection = [det_[t] for t in u_det_]
u_tracks = u_tracks + res_track
u_detection = u_detection + res_det
else:
u_detection = detections
return activated_tracklets, refind_tracklets, u_tracks, u_detection
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlwh format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
inds_low = scores > 0.1
inds_high = scores < self.args.conf_thresh
inds_second = np.logical_and(inds_low, inds_high)
dets_second = bboxes[inds_second]
dets = bboxes[remain_inds]
cates = categories[remain_inds]
cates_second = categories[inds_second]
scores_keep = scores[remain_inds]
scores_second = scores[inds_second]
if len(dets) > 0:
detections = [Tracklet_w_depth(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets, scores_keep, cates)]
else:
detections = []
''' Step 1: Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with high score detection boxes, depth cascade mathcing'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
# Camera motion compensation
warp = self.gmc.apply(ori_img, dets)
self.gmc.multi_gmc(tracklet_pool, warp)
self.gmc.multi_gmc(unconfirmed, warp)
# depth cascade matching
activated_tracklets, refind_tracklets, u_track, u_detection_high = self.DCM(
detections,
tracklet_pool,
activated_tracklets,
refind_tracklets,
levels=3,
thresh=0.75,
is_fuse=True)
''' Step 3: Second association, with low score detection boxes, depth cascade mathcing'''
if len(dets_second) > 0:
'''Detections'''
detections_second = [Tracklet_w_depth(tlwh, s, cate, motion=self.motion) for
(tlwh, s, cate) in zip(dets_second, scores_second, cates_second)]
else:
detections_second = []
r_tracked_tracklets = [t for t in u_track if t.state == TrackState.Tracked]
activated_tracklets, refind_tracklets, u_track, u_detection_sec = self.DCM(
detections_second,
r_tracked_tracklets,
activated_tracklets,
refind_tracklets,
levels=3,
thresh=0.3,
is_fuse=False)
for track in u_track:
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = u_detection_high
dists = iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,327 @@
"""
Deep Sort
"""
import numpy as np
import torch
from torchvision.ops import nms
import cv2
import torchvision.transforms as T
from .basetrack import BaseTrack, TrackState
from .tracklet import Tracklet, Tracklet_w_reid
from .matching import *
from .reid_models.OSNet import *
from .reid_models.load_model_tools import load_pretrained_weights
from .reid_models.deepsort_reid import Extractor
REID_MODEL_DICT = {
'osnet_x1_0': osnet_x1_0,
'osnet_x0_75': osnet_x0_75,
'osnet_x0_5': osnet_x0_5,
'osnet_x0_25': osnet_x0_25,
'deepsort': Extractor
}
def load_reid_model(reid_model, reid_model_path):
if 'osnet' in reid_model:
func = REID_MODEL_DICT[reid_model]
model = func(num_classes=1, pretrained=False, )
load_pretrained_weights(model, reid_model_path)
model.cuda().eval()
elif 'deepsort' in reid_model:
model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
else:
raise NotImplementedError
return model
class StrongSortTracker(object):
def __init__(self, args, frame_rate=30):
self.tracked_tracklets = [] # type: list[Tracklet]
self.lost_tracklets = [] # type: list[Tracklet]
self.removed_tracklets = [] # type: list[Tracklet]
self.frame_id = 0
self.args = args
self.det_thresh = args.conf_thresh + 0.1
self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
self.max_time_lost = self.buffer_size
self.motion = args.kalman_format
self.with_reid = not args.discard_reid
self.reid_model, self.crop_transforms = None, None
if self.with_reid:
self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
self.crop_transforms = T.Compose([
# T.ToPILImage(),
# T.Resize(size=(256, 128)),
T.ToTensor(), # (c, 128, 256)
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128)
self.lambda_ = 0.98 # the coef of cost mix in eq. 10 in paper
def reid_preprocess(self, obj_bbox):
"""
preprocess cropped object bboxes
obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
return:
torch.Tensor of shape (c, 128, 256)
"""
obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size) # shape: (h, w, c)
return self.crop_transforms(obj_bbox)
def get_feature(self, tlwhs, ori_img):
"""
get apperance feature of an object
tlwhs: shape (num_of_objects, 4)
ori_img: original image, np.ndarray, shape(H, W, C)
"""
obj_bbox = []
for tlwh in tlwhs:
tlwh = list(map(int, tlwh))
# limit to the legal range
tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0)
tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
obj_bbox.append(tlbr_tensor)
if not obj_bbox:
return np.array([])
obj_bbox = torch.stack(obj_bbox, dim=0)
obj_bbox = obj_bbox.cuda()
features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim)
return features.cpu().detach().numpy()
def update(self, output_results, img, ori_img):
"""
output_results: processed detections (scale to original size) tlbr format
"""
self.frame_id += 1
activated_tracklets = []
refind_tracklets = []
lost_tracklets = []
removed_tracklets = []
scores = output_results[:, 4]
bboxes = output_results[:, :4]
categories = output_results[:, -1]
remain_inds = scores > self.args.conf_thresh
dets = bboxes[remain_inds]
cates = categories[remain_inds]
scores_keep = scores[remain_inds]
features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
if len(dets) > 0:
'''Detections'''
detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
(tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
else:
detections = []
''' Add newly detected tracklets to tracked_tracklets'''
unconfirmed = []
tracked_tracklets = [] # type: list[Tracklet]
for track in self.tracked_tracklets:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_tracklets.append(track)
''' Step 2: First association, with appearance'''
tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets)
# Predict the current location with Kalman
for tracklet in tracklet_pool:
tracklet.predict()
# vallina matching
cost_matrix = self.gated_metric(tracklet_pool, detections)
matches, u_track, u_detection = linear_assignment(cost_matrix, thresh=0.9)
for itracked, idet in matches:
track = tracklet_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
'''Step 3: Second association, with iou'''
tracklet_for_iou = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked]
detection_for_iou = [detections[i] for i in u_detection]
dists = iou_distance(tracklet_for_iou, detection_for_iou)
matches, u_track, u_detection = linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = tracklet_for_iou[itracked]
det = detection_for_iou[idet]
if track.state == TrackState.Tracked:
track.update(detection_for_iou[idet], self.frame_id)
activated_tracklets.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_tracklets.append(track)
for it in u_track:
track = tracklet_for_iou[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracklets.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detection_for_iou[i] for i in u_detection]
dists = iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_tracklets.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_tracklets.append(track)
""" Step 4: Init new tracklets"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.frame_id)
activated_tracklets.append(track)
""" Step 5: Update state"""
for track in self.lost_tracklets:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracklets.append(track)
# print('Ramained match {} s'.format(t4-t3))
self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked]
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets)
self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets)
self.lost_tracklets.extend(lost_tracklets)
self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets)
self.removed_tracklets.extend(removed_tracklets)
self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets)
# get scores of lost tracks
output_tracklets = [track for track in self.tracked_tracklets if track.is_activated]
return output_tracklets
def gated_metric(self, tracks, dets):
"""
get cost matrix, firstly calculate apperence cost, then filter by Kalman state.
tracks: List[STrack]
dets: List[STrack]
"""
apperance_dist = embedding_distance(tracks=tracks, detections=dets, metric='cosine')
cost_matrix = self.gate_cost_matrix(apperance_dist, tracks, dets, )
return cost_matrix
def gate_cost_matrix(self, cost_matrix, tracks, dets, max_apperance_thresh=0.15, gated_cost=1e5, only_position=False):
"""
gate cost matrix by calculating the Kalman state distance and constrainted by
0.95 confidence interval of x2 distribution
cost_matrix: np.ndarray, shape (len(tracks), len(dets))
tracks: List[STrack]
dets: List[STrack]
gated_cost: a very largt const to infeasible associations
only_position: use [xc, yc, a, h] as state vector or only use [xc, yc]
return:
updated cost_matirx, np.ndarray
"""
gating_dim = 2 if only_position else 4
gating_threshold = chi2inv95[gating_dim]
measurements = np.asarray([Tracklet.tlwh_to_xyah(det.tlwh) for det in dets]) # (len(dets), 4)
cost_matrix[cost_matrix > max_apperance_thresh] = gated_cost
for row, track in enumerate(tracks):
gating_distance = track.kalman_filter.gating_distance(measurements, )
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
cost_matrix[row] = self.lambda_ * cost_matrix[row] + (1 - self.lambda_) * gating_distance
return cost_matrix
def joint_tracklets(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_tracklets(tlista, tlistb):
tracklets = {}
for t in tlista:
tracklets[t.track_id] = t
for t in tlistb:
tid = t.track_id
if tracklets.get(tid, 0):
del tracklets[tid]
return list(tracklets.values())
def remove_duplicate_tracklets(trackletsa, trackletsb):
pdist = iou_distance(trackletsa, trackletsb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = trackletsa[p].frame_id - trackletsa[p].start_frame
timeq = trackletsb[q].frame_id - trackletsb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(trackletsa) if not i in dupa]
resb = [t for i, t in enumerate(trackletsb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,366 @@
"""
implements base elements of trajectory
"""
import numpy as np
from collections import deque
from .basetrack import BaseTrack, TrackState
from .kalman_filters.bytetrack_kalman import ByteKalman
from .kalman_filters.botsort_kalman import BotKalman
from .kalman_filters.ocsort_kalman import OCSORTKalman
from .kalman_filters.sort_kalman import SORTKalman
from .kalman_filters.strongsort_kalman import NSAKalman
MOTION_MODEL_DICT = {
'sort': SORTKalman,
'byte': ByteKalman,
'bot': BotKalman,
'ocsort': OCSORTKalman,
'strongsort': NSAKalman,
}
STATE_CONVERT_DICT = {
'sort': 'xysa',
'byte': 'xyah',
'bot': 'xywh',
'ocsort': 'xysa',
'strongsort': 'xyah'
}
class Tracklet(BaseTrack):
def __init__(self, tlwh, score, category, motion='byte'):
# initial position
self._tlwh = np.asarray(tlwh, dtype=np.float)
self.is_activated = False
self.score = score
self.category = category
# kalman
self.motion = motion
self.kalman_filter = MOTION_MODEL_DICT[motion]()
self.convert_func = self.__getattribute__('tlwh_to_' + STATE_CONVERT_DICT[motion])
# init kalman
self.kalman_filter.initialize(self.convert_func(self._tlwh))
def predict(self):
self.kalman_filter.predict()
self.time_since_update += 1
def activate(self, frame_id):
self.track_id = self.next_id()
self.state = TrackState.Tracked
if frame_id == 1:
self.is_activated = True
self.frame_id = frame_id
self.start_frame = frame_id
def re_activate(self, new_track, frame_id, new_id=False):
# TODO different convert
self.kalman_filter.update(self.convert_func(new_track.tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.frame_id = frame_id
if new_id:
self.track_id = self.next_id()
self.score = new_track.score
def update(self, new_track, frame_id):
self.frame_id = frame_id
new_tlwh = new_track.tlwh
self.score = new_track.score
self.kalman_filter.update(self.convert_func(new_tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.time_since_update = 0
@property
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
return self.__getattribute__(STATE_CONVERT_DICT[self.motion] + '_to_tlwh')()
def xyah_to_tlwh(self, ):
x = self.kalman_filter.kf.x
ret = x[:4].copy()
ret[2] *= ret[3]
ret[:2] -= ret[2:] / 2
return ret
def xywh_to_tlwh(self, ):
x = self.kalman_filter.kf.x
ret = x[:4].copy()
ret[:2] -= ret[2:] / 2
return ret
def xysa_to_tlwh(self, ):
x = self.kalman_filter.kf.x
ret = x[:4].copy()
ret[2] = np.sqrt(x[2] * x[3])
ret[3] = x[2] / ret[2]
ret[:2] -= ret[2:] / 2
return ret
class Tracklet_w_reid(Tracklet):
"""
Tracklet class with reid features, for botsort, deepsort, etc.
"""
def __init__(self, tlwh, score, category, motion='byte',
feat=None, feat_history=50):
super().__init__(tlwh, score, category, motion)
self.smooth_feat = None # EMA feature
self.curr_feat = None # current feature
self.features = deque([], maxlen=feat_history) # all features
if feat is not None:
self.update_features(feat)
self.alpha = 0.9
def update_features(self, feat):
feat /= np.linalg.norm(feat)
self.curr_feat = feat
if self.smooth_feat is None:
self.smooth_feat = feat
else:
self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat
self.features.append(feat)
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
def re_activate(self, new_track, frame_id, new_id=False):
# TODO different convert
if isinstance(self.kalman_filter, NSAKalman):
self.kalman_filter.update(self.convert_func(new_track.tlwh), new_track.score)
else:
self.kalman_filter.update(self.convert_func(new_track.tlwh))
if new_track.curr_feat is not None:
self.update_features(new_track.curr_feat)
self.state = TrackState.Tracked
self.is_activated = True
self.frame_id = frame_id
if new_id:
self.track_id = self.next_id()
self.score = new_track.score
def update(self, new_track, frame_id):
self.frame_id = frame_id
new_tlwh = new_track.tlwh
self.score = new_track.score
if isinstance(self.kalman_filter, NSAKalman):
self.kalman_filter.update(self.convert_func(new_tlwh), self.score)
else:
self.kalman_filter.update(self.convert_func(new_tlwh))
self.state = TrackState.Tracked
self.is_activated = True
if new_track.curr_feat is not None:
self.update_features(new_track.curr_feat)
self.time_since_update = 0
class Tracklet_w_velocity(Tracklet):
"""
Tracklet class with reid features, for ocsort.
"""
def __init__(self, tlwh, score, category, motion='byte', delta_t=3):
super().__init__(tlwh, score, category, motion)
self.last_observation = np.array([-1, -1, -1, -1, -1]) # placeholder
self.observations = dict()
self.history_observations = []
self.velocity = None
self.delta_t = delta_t
self.age = 0 # mark the age
@staticmethod
def speed_direction(bbox1, bbox2):
cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0
cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0
speed = np.array([cy2 - cy1, cx2 - cx1])
norm = np.sqrt((cy2 - cy1)**2 + (cx2 - cx1)**2) + 1e-6
return speed / norm
def predict(self):
self.kalman_filter.predict()
self.age += 1
self.time_since_update += 1
def update(self, new_track, frame_id):
self.frame_id = frame_id
new_tlwh = new_track.tlwh
self.score = new_track.score
self.kalman_filter.update(self.convert_func(new_tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.time_since_update = 0
# update velocity and history buffer
new_tlbr = Tracklet_w_bbox_buffer.tlwh_to_tlbr(new_tlwh)
if self.last_observation.sum() >= 0: # no previous observation
previous_box = None
for i in range(self.delta_t):
dt = self.delta_t - i
if self.age - dt in self.observations:
previous_box = self.observations[self.age-dt]
break
if previous_box is None:
previous_box = self.last_observation
"""
Estimate the track speed direction with observations \Delta t steps away
"""
self.velocity = self.speed_direction(previous_box, new_tlbr)
new_observation = np.r_[new_tlbr, new_track.score]
self.last_observation = new_observation
self.observations[self.age] = new_observation
self.history_observations.append(new_observation)
class Tracklet_w_bbox_buffer(Tracklet):
"""
Tracklet class with buffer of bbox, for C_BIoU track.
"""
def __init__(self, tlwh, score, category, motion='byte'):
super().__init__(tlwh, score, category, motion)
# params in motion state
self.b1, self.b2, self.n = 0.3, 0.5, 5
self.origin_bbox_buffer = deque() # a deque store the original bbox(tlwh) from t - self.n to t, where t is the last time detected
self.origin_bbox_buffer.append(self._tlwh)
# buffered bbox, two buffer sizes
self.buffer_bbox1 = self.get_buffer_bbox(level=1)
self.buffer_bbox2 = self.get_buffer_bbox(level=2)
# motion state, s^{t + \delta} = o^t + (\delta / n) * \sum_{i=t-n+1}^t(o^i - o^{i-1}) = o^t + (\delta / n) * (o^t - o^{t - n})
self.motion_state1 = self.buffer_bbox1.copy()
self.motion_state2 = self.buffer_bbox2.copy()
def get_buffer_bbox(self, level=1, bbox=None):
"""
get buffered bbox as: (top, left, w, h) -> (top - bw, y - bh, w + 2bw, h + 2bh)
level = 1: b = self.b1 level = 2: b = self.b2
bbox: if not None, use bbox to calculate buffer_bbox, else use self._tlwh
"""
assert level in [1, 2], 'level must be 1 or 2'
b = self.b1 if level == 1 else self.b2
if bbox is None:
buffer_bbox = self._tlwh + np.array([-b*self._tlwh[2], -b*self._tlwh[3], 2*b*self._tlwh[2], 2*b*self._tlwh[3]])
else:
buffer_bbox = bbox + np.array([-b*bbox[2], -b*bbox[3], 2*b*bbox[2], 2*b*bbox[3]])
return np.maximum(0.0, buffer_bbox)
def re_activate(self, new_track, frame_id, new_id=False):
# TODO different convert
self.kalman_filter.update(self.convert_func(new_track.tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.frame_id = frame_id
if new_id:
self.track_id = self.next_id()
self.score = new_track.score
self._tlwh = new_track._tlwh
# update stored bbox
if (len(self.origin_bbox_buffer) > self.n):
self.origin_bbox_buffer.popleft()
self.origin_bbox_buffer.append(self._tlwh)
else:
self.origin_bbox_buffer.append(self._tlwh)
self.buffer_bbox1 = self.get_buffer_bbox(level=1)
self.buffer_bbox2 = self.get_buffer_bbox(level=2)
self.motion_state1 = self.buffer_bbox1.copy()
self.motion_state2 = self.buffer_bbox2.copy()
def update(self, new_track, frame_id):
self.frame_id = frame_id
new_tlwh = new_track.tlwh
self.score = new_track.score
self.kalman_filter.update(self.convert_func(new_tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.time_since_update = 0
# update stored bbox
if (len(self.origin_bbox_buffer) > self.n):
self.origin_bbox_buffer.popleft()
self.origin_bbox_buffer.append(new_tlwh)
else:
self.origin_bbox_buffer.append(new_tlwh)
# update motion state
if self.time_since_update: # have some unmatched frames
if len(self.origin_bbox_buffer) < self.n:
self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh)
self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh)
else: # s^{t + \delta} = o^t + (\delta / n) * (o^t - o^{t - n})
motion_state = self.origin_bbox_buffer[-1] + \
(self.time_since_update / self.n) * (self.origin_bbox_buffer[-1] - self.origin_bbox_buffer[0])
self.motion_state1 = self.get_buffer_bbox(level=1, bbox=motion_state)
self.motion_state2 = self.get_buffer_bbox(level=2, bbox=motion_state)
else: # no unmatched frames, use current detection as motion state
self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh)
self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh)
class Tracklet_w_depth(Tracklet):
"""
tracklet with depth info (i.e., 2000 - y2), for SparseTrack
"""
def __init__(self, tlwh, score, category, motion='byte'):
super().__init__(tlwh, score, category, motion)
@property
# @jit(nopython=True)
def deep_vec(self):
"""Convert bounding box to format `((top left, bottom right)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
cx = ret[0] + 0.5 * ret[2]
y2 = ret[1] + ret[3]
lendth = 2000 - y2
return np.asarray([cx, y2, lendth], dtype=np.float)