Source code for torchreid.data.datasets.video.dukemtmcvidreid

from __future__ import division, print_function, absolute_import
import glob
import os.path as osp
import warnings

from torchreid.utils import read_json, write_json

from ..dataset import VideoDataset


[docs]class DukeMTMCVidReID(VideoDataset): """DukeMTMCVidReID. Reference: - Ristani et al. Performance Measures and a Data Set for Multi-Target, Multi-Camera Tracking. ECCVW 2016. - Wu et al. Exploit the Unknown Gradually: One-Shot Video-Based Person Re-Identification by Stepwise Learning. CVPR 2018. URL: `<https://github.com/Yu-Wu/DukeMTMC-VideoReID>`_ Dataset statistics: - identities: 702 (train) + 702 (test). - tracklets: 2196 (train) + 2636 (test). """ dataset_dir = 'dukemtmc-vidreid' dataset_url = 'http://vision.cs.duke.edu/DukeMTMC/data/misc/DukeMTMC-VideoReID.zip' def __init__(self, root='', min_seq_len=0, **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.train_dir = osp.join(self.dataset_dir, 'DukeMTMC-VideoReID/train') self.query_dir = osp.join(self.dataset_dir, 'DukeMTMC-VideoReID/query') self.gallery_dir = osp.join( self.dataset_dir, 'DukeMTMC-VideoReID/gallery' ) self.split_train_json_path = osp.join( self.dataset_dir, 'split_train.json' ) self.split_query_json_path = osp.join( self.dataset_dir, 'split_query.json' ) self.split_gallery_json_path = osp.join( self.dataset_dir, 'split_gallery.json' ) self.min_seq_len = min_seq_len required_files = [ self.dataset_dir, self.train_dir, self.query_dir, self.gallery_dir ] self.check_before_run(required_files) train = self.process_dir( self.train_dir, self.split_train_json_path, relabel=True ) query = self.process_dir( self.query_dir, self.split_query_json_path, relabel=False ) gallery = self.process_dir( self.gallery_dir, self.split_gallery_json_path, relabel=False ) super(DukeMTMCVidReID, self).__init__(train, query, gallery, **kwargs) def process_dir(self, dir_path, json_path, relabel): if osp.exists(json_path): split = read_json(json_path) return split['tracklets'] print('=> Generating split json file (** this might take a while **)') pdirs = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store print( 'Processing "{}" with {} person identities'.format( dir_path, len(pdirs) ) ) pid_container = set() for pdir in pdirs: pid = int(osp.basename(pdir)) pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} tracklets = [] for pdir in pdirs: pid = int(osp.basename(pdir)) if relabel: pid = pid2label[pid] tdirs = glob.glob(osp.join(pdir, '*')) for tdir in tdirs: raw_img_paths = glob.glob(osp.join(tdir, '*.jpg')) num_imgs = len(raw_img_paths) if num_imgs < self.min_seq_len: continue img_paths = [] for img_idx in range(num_imgs): # some tracklet starts from 0002 instead of 0001 img_idx_name = 'F' + str(img_idx + 1).zfill(4) res = glob.glob( osp.join(tdir, '*' + img_idx_name + '*.jpg') ) if len(res) == 0: warnings.warn( 'Index name {} in {} is missing, skip'.format( img_idx_name, tdir ) ) continue img_paths.append(res[0]) img_name = osp.basename(img_paths[0]) if img_name.find('_') == -1: # old naming format: 0001C6F0099X30823.jpg camid = int(img_name[5]) - 1 else: # new naming format: 0001_C6_F0099_X30823.jpg camid = int(img_name[6]) - 1 img_paths = tuple(img_paths) tracklets.append((img_paths, pid, camid)) print('Saving split to {}'.format(json_path)) split_dict = {'tracklets': tracklets} write_json(split_dict, json_path) return tracklets