Spaces:

kzielins
/

MotionBERT

Configuration error

MotionBERT / lib /data /dataset_action.py

kzielins

motion bert project structure added

dbf90d0 over 1 year ago

7.92 kB

	import torch
	import numpy as np
	import os
	import random
	import copy
	from torch.utils.data import Dataset, DataLoader
	from lib.utils.utils_data import crop_scale, resample
	from lib.utils.tools import read_pkl

	def get_action_names(file_path = "data/action/ntu_actions.txt"):
	f = open(file_path, "r")
	s = f.read()
	actions = s.split('\n')
	action_names = []
	for a in actions:
	action_names.append(a.split('.')[1][1:])
	return action_names

	def make_cam(x, img_shape):
	'''
	Input: x (M x T x V x C)
	img_shape (height, width)
	'''
	h, w = img_shape
	if w >= h:
	x_cam = x / w * 2 - 1
	else:
	x_cam = x / h * 2 - 1
	return x_cam

	def coco2h36m(x):
	'''
	Input: x (M x T x V x C)

	COCO: {0-nose 1-Leye 2-Reye 3-Lear 4Rear 5-Lsho 6-Rsho 7-Lelb 8-Relb 9-Lwri 10-Rwri 11-Lhip 12-Rhip 13-Lkne 14-Rkne 15-Lank 16-Rank}

	H36M:
	0: 'root',
	1: 'rhip',
	2: 'rkne',
	3: 'rank',
	4: 'lhip',
	5: 'lkne',
	6: 'lank',
	7: 'belly',
	8: 'neck',
	9: 'nose',
	10: 'head',
	11: 'lsho',
	12: 'lelb',
	13: 'lwri',
	14: 'rsho',
	15: 'relb',
	16: 'rwri'
	'''
	y = np.zeros(x.shape)
	y[:,:,0,:] = (x[:,:,11,:] + x[:,:,12,:]) * 0.5
	y[:,:,1,:] = x[:,:,12,:]
	y[:,:,2,:] = x[:,:,14,:]
	y[:,:,3,:] = x[:,:,16,:]
	y[:,:,4,:] = x[:,:,11,:]
	y[:,:,5,:] = x[:,:,13,:]
	y[:,:,6,:] = x[:,:,15,:]
	y[:,:,8,:] = (x[:,:,5,:] + x[:,:,6,:]) * 0.5
	y[:,:,7,:] = (y[:,:,0,:] + y[:,:,8,:]) * 0.5
	y[:,:,9,:] = x[:,:,0,:]
	y[:,:,10,:] = (x[:,:,1,:] + x[:,:,2,:]) * 0.5
	y[:,:,11,:] = x[:,:,5,:]
	y[:,:,12,:] = x[:,:,7,:]
	y[:,:,13,:] = x[:,:,9,:]
	y[:,:,14,:] = x[:,:,6,:]
	y[:,:,15,:] = x[:,:,8,:]
	y[:,:,16,:] = x[:,:,10,:]
	return y

	def random_move(data_numpy,
	angle_range=[-10., 10.],
	scale_range=[0.9, 1.1],
	transform_range=[-0.1, 0.1],
	move_time_candidate=[1]):
	data_numpy = np.transpose(data_numpy, (3,1,2,0)) # M,T,V,C-> C,T,V,M
	C, T, V, M = data_numpy.shape
	move_time = random.choice(move_time_candidate)
	node = np.arange(0, T, T * 1.0 / move_time).round().astype(int)
	node = np.append(node, T)
	num_node = len(node)
	A = np.random.uniform(angle_range[0], angle_range[1], num_node)
	S = np.random.uniform(scale_range[0], scale_range[1], num_node)
	T_x = np.random.uniform(transform_range[0], transform_range[1], num_node)
	T_y = np.random.uniform(transform_range[0], transform_range[1], num_node)
	a = np.zeros(T)
	s = np.zeros(T)
	t_x = np.zeros(T)
	t_y = np.zeros(T)
	# linspace
	for i in range(num_node - 1):
	a[node[i]:node[i + 1]] = np.linspace(
	A[i], A[i + 1], node[i + 1] - node[i]) * np.pi / 180
	s[node[i]:node[i + 1]] = np.linspace(S[i], S[i + 1], node[i + 1] - node[i])
	t_x[node[i]:node[i + 1]] = np.linspace(T_x[i], T_x[i + 1], node[i + 1] - node[i])
	t_y[node[i]:node[i + 1]] = np.linspace(T_y[i], T_y[i + 1], node[i + 1] - node[i])
	theta = np.array([[np.cos(a) * s, -np.sin(a) * s],
	[np.sin(a) * s, np.cos(a) * s]])
	# perform transformation
	for i_frame in range(T):
	xy = data_numpy[0:2, i_frame, :, :]
	new_xy = np.dot(theta[:, :, i_frame], xy.reshape(2, -1))
	new_xy[0] += t_x[i_frame]
	new_xy[1] += t_y[i_frame]
	data_numpy[0:2, i_frame, :, :] = new_xy.reshape(2, V, M)
	data_numpy = np.transpose(data_numpy, (3,1,2,0)) # C,T,V,M -> M,T,V,C
	return data_numpy

	def human_tracking(x):
	M, T = x.shape[:2]
	if M==1:
	return x
	else:
	diff0 = np.sum(np.linalg.norm(x[0,1:] - x[0,:-1], axis=-1), axis=-1) # (T-1, V, C) -> (T-1)
	diff1 = np.sum(np.linalg.norm(x[0,1:] - x[1,:-1], axis=-1), axis=-1)
	x_new = np.zeros(x.shape)
	sel = np.cumsum(diff0 > diff1) % 2
	sel = sel[:,None,None]
	x_new[0][0] = x[0][0]
	x_new[1][0] = x[1][0]
	x_new[0,1:] = x[1,1:] * sel + x[0,1:] * (1-sel)
	x_new[1,1:] = x[0,1:] * sel + x[1,1:] * (1-sel)
	return x_new

	class ActionDataset(Dataset):
	def __init__(self, data_path, data_split, n_frames=243, random_move=True, scale_range=[1,1], check_split=True): # data_split: train/test etc.
	np.random.seed(0)
	dataset = read_pkl(data_path)
	if check_split:
	assert data_split in dataset['split'].keys()
	self.split = dataset['split'][data_split]
	annotations = dataset['annotations']
	self.random_move = random_move
	self.is_train = "train" in data_split or (check_split==False)
	if "oneshot" in data_split:
	self.is_train = False
	self.scale_range = scale_range
	motions = []
	labels = []
	for sample in annotations:
	if check_split and (not sample['frame_dir'] in self.split):
	continue
	resample_id = resample(ori_len=sample['total_frames'], target_len=n_frames, randomness=self.is_train)
	motion_cam = make_cam(x=sample['keypoint'], img_shape=sample['img_shape'])
	motion_cam = human_tracking(motion_cam)
	motion_cam = coco2h36m(motion_cam)
	motion_conf = sample['keypoint_score'][..., None]
	motion = np.concatenate((motion_cam[:,resample_id], motion_conf[:,resample_id]), axis=-1)
	if motion.shape[0]==1: # Single person, make a fake zero person
	fake = np.zeros(motion.shape)
	motion = np.concatenate((motion, fake), axis=0)
	motions.append(motion.astype(np.float32))
	labels.append(sample['label'])
	self.motions = np.array(motions)
	self.labels = np.array(labels)

	def __len__(self):
	'Denotes the total number of samples'
	return len(self.motions)

	def __getitem__(self, index):
	raise NotImplementedError

	class NTURGBD(ActionDataset):
	def __init__(self, data_path, data_split, n_frames=243, random_move=True, scale_range=[1,1]):
	super(NTURGBD, self).__init__(data_path, data_split, n_frames, random_move, scale_range)

	def __getitem__(self, idx):
	'Generates one sample of data'
	motion, label = self.motions[idx], self.labels[idx] # (M,T,J,C)
	if self.random_move:
	motion = random_move(motion)
	if self.scale_range:
	result = crop_scale(motion, scale_range=self.scale_range)
	else:
	result = motion
	return result.astype(np.float32), label

	class NTURGBD1Shot(ActionDataset):
	def __init__(self, data_path, data_split, n_frames=243, random_move=True, scale_range=[1,1], check_split=False):
	super(NTURGBD1Shot, self).__init__(data_path, data_split, n_frames, random_move, scale_range, check_split)
	oneshot_classes = [0, 6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78, 84, 90, 96, 102, 108, 114]
	new_classes = set(range(120)) - set(oneshot_classes)
	old2new = {}
	for i, cid in enumerate(new_classes):
	old2new[cid] = i
	filtered = [not (x in oneshot_classes) for x in self.labels]
	self.motions = self.motions[filtered]
	filtered_labels = self.labels[filtered]
	self.labels = [old2new[x] for x in filtered_labels]

	def __getitem__(self, idx):
	'Generates one sample of data'
	motion, label = self.motions[idx], self.labels[idx] # (M,T,J,C)
	if self.random_move:
	motion = random_move(motion)
	if self.scale_range:
	result = crop_scale(motion, scale_range=self.scale_range)
	else:
	result = motion
	return result.astype(np.float32), label