You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
connecting_the_dots/data/create_syn_data.py

279 lines
9.1 KiB

5 years ago
import numpy as np
import matplotlib.pyplot as plt
import itertools
import pickle
from pathlib import Path
import multiprocessing
import time
import json
import cv2
import os
import collections
import sys
import xmltodict as xmltodict
5 years ago
sys.path.append('../')
import renderer
import co
from commons import get_patterns, get_rotation_matrix
5 years ago
from lcn import lcn
def get_objs(shapenet_dir, obj_classes, num_perclass=100):
shapenet = {'chair': '03001627',
'airplane': '02691156',
'car': '02958343',
'watercraft': '04530566'}
obj_paths = []
for cls in obj_classes:
if cls not in shapenet.keys():
raise Exception('unknown class name')
ids = shapenet[cls]
obj_path = sorted(Path(f'{shapenet_dir}/{ids}').glob('**/models/*.obj'))
obj_paths += obj_path[:num_perclass]
print(f'found {len(obj_paths)} object paths')
objs = []
for obj_path in obj_paths:
print(f'load {obj_path}')
v, f, _, n = co.io3d.read_obj(obj_path)
diffs = v.max(axis=0) - v.min(axis=0)
v /= (0.5 * diffs.max())
v -= (v.min(axis=0) + 1)
f = f.astype(np.int32)
objs.append((v, f, n))
print(f'loaded {len(objs)} objects')
return objs
5 years ago
def get_mesh(rng, min_z=0):
# set up background board
verts, faces, normals, colors = [], [], [], []
v, f, n = co.geometry.xyplane(z=0, interleaved=True)
v[:, 2] += -v[:, 2].min() + rng.uniform(2, 7)
v[:, :2] *= 5e2
v[:, 2] = np.mean(v[:, 2]) + (v[:, 2] - np.mean(v[:, 2])) * 5e2
5 years ago
c = np.empty_like(v)
c[:] = rng.uniform(0, 1, size=(3,)).astype(np.float32)
verts.append(v)
5 years ago
faces.append(f)
normals.append(n)
colors.append(c)
# randomly sample 4 foreground objects for each scene
for shape_idx in range(4):
v, f, n = objs[rng.randint(0, len(objs))]
v, f, n = v.copy(), f.copy(), n.copy()
s = rng.uniform(0.25, 1)
v *= s
R = co.geometry.rotm_from_quat(co.geometry.quat_random(rng=rng))
v = v @ R.T
n = n @ R.T
v[:, 2] += -v[:, 2].min() + min_z + rng.uniform(0.5, 3)
v[:, :2] += rng.uniform(-1, 1, size=(1, 2))
c = np.empty_like(v)
c[:] = rng.uniform(0, 1, size=(3,)).astype(np.float32)
verts.append(v.astype(np.float32))
faces.append(f)
normals.append(n)
colors.append(c)
verts, faces = co.geometry.stack_mesh(verts, faces)
normals = np.vstack(normals).astype(np.float32)
colors = np.vstack(colors).astype(np.float32)
return verts, faces, colors, normals
5 years ago
def create_data(out_root, idx, n_samples, imsize, patterns, K, baseline, blend_im, noise, track_length=4):
tic = time.time()
rng = np.random.RandomState()
rng.seed(idx)
verts, faces, colors, normals = get_mesh(rng)
data = renderer.PyRenderInput(verts=verts.copy(), colors=colors.copy(), normals=normals.copy(), faces=faces.copy())
print(f'loading mesh for sample {idx + 1}/{n_samples} took {time.time() - tic}[s]')
# let the camera point to the center
center = np.array([0, 0, 3], dtype=np.float32)
basevec = np.array([-baseline, 0, 0], dtype=np.float32)
unit = np.array([0, 0, 1], dtype=np.float32)
cam_x_ = rng.uniform(-0.2, 0.2)
cam_y_ = rng.uniform(-0.2, 0.2)
cam_z_ = rng.uniform(-0.2, 0.2)
ret = collections.defaultdict(list)
blend_im_rnd = np.clip(blend_im + rng.uniform(-0.1, 0.1), 0, 1)
# capture the same static scene from different view points as a track
for ind in range(track_length):
cam_x = cam_x_ + rng.uniform(-0.1, 0.1)
cam_y = cam_y_ + rng.uniform(-0.1, 0.1)
cam_z = cam_z_ + rng.uniform(-0.1, 0.1)
tcam = np.array([cam_x, cam_y, cam_z], dtype=np.float32)
if np.linalg.norm(tcam[0:2]) < 1e-9:
Rcam = np.eye(3, dtype=np.float32)
else:
Rcam = get_rotation_matrix(center, center - tcam)
tproj = tcam + basevec
Rproj = Rcam
ret['R'].append(Rcam)
ret['t'].append(tcam)
cams = []
projs = []
# render the scene at multiple scales
scales = [1, 0.5, 0.25, 0.125]
for scale in scales:
fx = K[0, 0] * scale
fy = K[1, 1] * scale
px = K[0, 2] * scale
py = K[1, 2] * scale
im_height = imsize[0] * scale
im_width = imsize[1] * scale
cams.append(renderer.PyCamera(fx, fy, px, py, Rcam, tcam, im_width, im_height))
projs.append(renderer.PyCamera(fx, fy, px, py, Rproj, tproj, im_width, im_height))
for s, cam, proj, pattern in zip(itertools.count(), cams, projs, patterns):
fl = K[0, 0] / (2 ** s)
5 years ago
shader = renderer.PyShader(0.5, 1.5, 0.0, 10)
pyrenderer = renderer.PyRenderer(cam, shader, engine='gpu')
pyrenderer.mesh_proj(data, proj, pattern, d_alpha=0, d_beta=0.35)
5 years ago
# get the reflected laser pattern $R$
im = pyrenderer.color().copy()
depth = pyrenderer.depth().copy()
disp = baseline * fl / depth
mask = depth > 0
im = np.mean(im, axis=2)
5 years ago
# get the ambient image $A$
ambient = pyrenderer.normal().copy()
ambient = np.mean(ambient, axis=2)
5 years ago
# get the noise free IR image $J$
im = blend_im_rnd * im + (1 - blend_im_rnd) * ambient
ret[f'ambient{s}'].append(ambient[None].astype(np.float32))
5 years ago
# get the gradient magnitude of the ambient image $|\nabla A|$
ambient = ambient.astype(np.float32)
sobelx = cv2.Sobel(ambient, cv2.CV_32F, 1, 0, ksize=5)
sobely = cv2.Sobel(ambient, cv2.CV_32F, 0, 1, ksize=5)
grad = np.sqrt(sobelx ** 2 + sobely ** 2)
grad = np.maximum(grad - 0.8, 0.0) # parameter
5 years ago
# get the local contract normalized grad LCN($|\nabla A|$)
grad_lcn, grad_std = lcn.normalize(grad, 5, 0.1)
grad_lcn = np.clip(grad_lcn, 0.0, 1.0) # parameter
ret[f'grad{s}'].append(grad_lcn[None].astype(np.float32))
5 years ago
ret[f'im{s}'].append(im[None].astype(np.float32))
ret[f'mask{s}'].append(mask[None].astype(np.float32))
ret[f'disp{s}'].append(disp[None].astype(np.float32))
5 years ago
for key in ret.keys():
ret[key] = np.stack(ret[key], axis=0)
5 years ago
# save to files
out_dir = out_root / f'{idx:08d}'
out_dir.mkdir(exist_ok=True, parents=True)
for k, val in ret.items():
for tidx in range(track_length):
v = val[tidx]
out_path = out_dir / f'{k}_{tidx}.npy'
np.save(out_path, v)
np.save(str(out_dir / 'blend_im.npy'), blend_im_rnd)
5 years ago
print(f'create sample {idx + 1}/{n_samples} took {time.time() - tic}[s]')
5 years ago
def load_camera_parameters():
with open('calibration_result.xml') as f:
cam_mat = xmltodict.parse(f.read())
# weird casting cause the values are str(float) (eg. '123.'), but we want int
imsize = [int(float(x)) for x in cam_mat['opencv_storage']['img_shape']['data'].split()]
K_shape = int(cam_mat['opencv_storage']['cam_int']['rows']), int(cam_mat['opencv_storage']['cam_int']['cols'])
K = np.array(cam_mat['opencv_storage']['cam_int']['data'].split(), dtype=float).reshape(K_shape).T
return imsize, K
if __name__ == '__main__':
np.random.seed(42)
# output directory
with open('../config.json') as fp:
config = json.load(fp)
data_root = Path(config['DATA_ROOT'])
shapenet_root = config['SHAPENET_ROOT']
data_type = 'syn'
out_root = data_root / f'{data_type}'
out_root.mkdir(parents=True, exist_ok=True)
start = 0
if len(sys.argv) >= 2 and isinstance(sys.argv[1], int):
start = sys.argv[1]
5 years ago
else:
if sys.argv[1] == '--resume':
try:
start = max([int(dir) for dir in os.listdir(out_root) if str.isdigit(dir)]) - 1 or 0
except:
pass
# load shapenet models
obj_classes = ['chair']
objs = get_objs(shapenet_root, obj_classes)
# camera parameters
imsize, K = load_camera_parameters()
imsizes = [(imsize[0] // (2 ** s), imsize[1] // (2 ** s)) for s in range(4)]
focal_lengths = [K[0, 0] / (2 ** s) for s in range(4)]
baseline = 0.075
blend_im = 0.6
noise = 0
# capture the same static scene from different view points as a track
track_length = 4
# load pattern image
# FIXME which one????
pattern_path = './kinect_pattern.png'
pattern_crop = True
patterns = get_patterns(pattern_path, imsizes, pattern_crop)
# write settings to file
settings = {
'imsizes': imsizes,
'patterns': patterns,
'focal_lengths': focal_lengths,
'baseline': baseline,
'K': K,
}
out_path = out_root / f'settings.pkl'
print(f'write settings to {out_path}')
with open(str(out_path), 'wb') as f:
pickle.dump(settings, f, pickle.HIGHEST_PROTOCOL)
# start the job
n_samples = 2 ** 10 + 2 ** 13
for idx in range(start, n_samples):
args = (out_root, idx, n_samples, imsize, patterns, K, baseline, blend_im, noise, track_length)
create_data(*args)