You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
282 lines
9.3 KiB
282 lines
9.3 KiB
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import itertools
|
|
import pickle
|
|
from pathlib import Path
|
|
import multiprocessing
|
|
import time
|
|
import json
|
|
import cv2
|
|
import os
|
|
import collections
|
|
import sys
|
|
|
|
import xmltodict as xmltodict
|
|
|
|
sys.path.append('../')
|
|
import renderer
|
|
import co
|
|
from commons import get_patterns, get_rotation_matrix
|
|
from lcn import lcn
|
|
|
|
|
|
def get_objs(shapenet_dir, obj_classes, num_perclass=100):
|
|
shapenet = {'chair': '03001627',
|
|
'airplane': '02691156',
|
|
'car': '02958343',
|
|
'watercraft': '04530566'}
|
|
|
|
obj_paths = []
|
|
for cls in obj_classes:
|
|
if cls not in shapenet.keys():
|
|
raise Exception('unknown class name')
|
|
ids = shapenet[cls]
|
|
obj_path = sorted(Path(f'{shapenet_dir}/{ids}').glob('**/models/*.obj'))
|
|
obj_paths += obj_path[:num_perclass]
|
|
print(f'found {len(obj_paths)} object paths')
|
|
|
|
objs = []
|
|
for obj_path in obj_paths:
|
|
print(f'load {obj_path}')
|
|
v, f, _, n = co.io3d.read_obj(obj_path)
|
|
diffs = v.max(axis=0) - v.min(axis=0)
|
|
v /= (0.5 * diffs.max())
|
|
v -= (v.min(axis=0) + 1)
|
|
f = f.astype(np.int32)
|
|
objs.append((v, f, n))
|
|
print(f'loaded {len(objs)} objects')
|
|
|
|
return objs
|
|
|
|
|
|
def get_mesh(rng, min_z=0):
|
|
# set up background board
|
|
verts, faces, normals, colors = [], [], [], []
|
|
v, f, n = co.geometry.xyplane(z=0, interleaved=True)
|
|
v[:, 2] += -v[:, 2].min() + rng.uniform(2, 7)
|
|
v[:, :2] *= 5e2
|
|
v[:, 2] = np.mean(v[:, 2]) + (v[:, 2] - np.mean(v[:, 2])) * 5e2
|
|
c = np.empty_like(v)
|
|
c[:] = rng.uniform(0, 1, size=(3,)).astype(np.float32)
|
|
verts.append(v)
|
|
faces.append(f)
|
|
normals.append(n)
|
|
colors.append(c)
|
|
|
|
# randomly sample 4 foreground objects for each scene
|
|
for shape_idx in range(4):
|
|
v, f, n = objs[rng.randint(0, len(objs))]
|
|
v, f, n = v.copy(), f.copy(), n.copy()
|
|
|
|
s = rng.uniform(0.25, 1)
|
|
v *= s
|
|
R = co.geometry.rotm_from_quat(co.geometry.quat_random(rng=rng))
|
|
v = v @ R.T
|
|
n = n @ R.T
|
|
v[:, 2] += -v[:, 2].min() + min_z + rng.uniform(0.5, 3)
|
|
v[:, :2] += rng.uniform(-1, 1, size=(1, 2))
|
|
|
|
c = np.empty_like(v)
|
|
c[:] = rng.uniform(0, 1, size=(3,)).astype(np.float32)
|
|
|
|
verts.append(v.astype(np.float32))
|
|
faces.append(f)
|
|
normals.append(n)
|
|
colors.append(c)
|
|
|
|
verts, faces = co.geometry.stack_mesh(verts, faces)
|
|
normals = np.vstack(normals).astype(np.float32)
|
|
colors = np.vstack(colors).astype(np.float32)
|
|
return verts, faces, colors, normals
|
|
|
|
|
|
def create_data(out_root, idx, n_samples, imsize, patterns, K, baseline, blend_im, noise, track_length=4):
|
|
tic = time.time()
|
|
rng = np.random.RandomState()
|
|
|
|
rng.seed(idx)
|
|
|
|
verts, faces, colors, normals = get_mesh(rng)
|
|
data = renderer.PyRenderInput(verts=verts.copy(), colors=colors.copy(), normals=normals.copy(), faces=faces.copy())
|
|
print(f'loading mesh for sample {idx + 1}/{n_samples} took {time.time() - tic}[s]')
|
|
|
|
# let the camera point to the center
|
|
center = np.array([0, 0, 3], dtype=np.float32)
|
|
|
|
basevec = np.array([-baseline, 0, 0], dtype=np.float32)
|
|
unit = np.array([0, 0, 1], dtype=np.float32)
|
|
|
|
cam_x_ = rng.uniform(-0.2, 0.2)
|
|
cam_y_ = rng.uniform(-0.2, 0.2)
|
|
cam_z_ = rng.uniform(-0.2, 0.2)
|
|
|
|
ret = collections.defaultdict(list)
|
|
blend_im_rnd = np.clip(blend_im + rng.uniform(-0.1, 0.1), 0, 1)
|
|
|
|
# capture the same static scene from different view points as a track
|
|
for ind in range(track_length):
|
|
|
|
cam_x = cam_x_ + rng.uniform(-0.1, 0.1)
|
|
cam_y = cam_y_ + rng.uniform(-0.1, 0.1)
|
|
cam_z = cam_z_ + rng.uniform(-0.1, 0.1)
|
|
|
|
tcam = np.array([cam_x, cam_y, cam_z], dtype=np.float32)
|
|
|
|
if np.linalg.norm(tcam[0:2]) < 1e-9:
|
|
Rcam = np.eye(3, dtype=np.float32)
|
|
else:
|
|
Rcam = get_rotation_matrix(center, center - tcam)
|
|
|
|
tproj = tcam + basevec
|
|
Rproj = Rcam
|
|
|
|
ret['R'].append(Rcam)
|
|
ret['t'].append(tcam)
|
|
|
|
cams = []
|
|
projs = []
|
|
|
|
# render the scene at multiple scales
|
|
scales = [1, 0.5, 0.25, 0.125]
|
|
for scale in scales:
|
|
fx = K[0, 0] * scale
|
|
fy = K[1, 1] * scale
|
|
px = K[0, 2] * scale
|
|
py = K[1, 2] * scale
|
|
im_height = imsize[0] * scale // 1
|
|
im_width = imsize[1] * scale // 1
|
|
cams.append(renderer.PyCamera(fx, fy, px, py, Rcam, tcam, im_width, im_height))
|
|
projs.append(renderer.PyCamera(fx, fy, px, py, Rproj, tproj, im_width, im_height))
|
|
|
|
for s, cam, proj, pattern in zip(itertools.count(), cams, projs, patterns):
|
|
fl = K[0, 0] / (2 ** s)
|
|
|
|
shader = renderer.PyShader(0.5, 1.5, 0.0, 10)
|
|
pyrenderer = renderer.PyRenderer(cam, shader, engine='gpu')
|
|
pyrenderer.mesh_proj(data, proj, pattern, d_alpha=0, d_beta=0.35)
|
|
|
|
# get the reflected laser pattern $R$
|
|
im = pyrenderer.color().copy()
|
|
depth = pyrenderer.depth().copy()
|
|
disp = baseline * fl / depth
|
|
mask = depth > 0
|
|
im = np.mean(im, axis=2)
|
|
|
|
# get the ambient image $A$
|
|
ambient = pyrenderer.normal().copy()
|
|
ambient = np.mean(ambient, axis=2)
|
|
|
|
# get the noise free IR image $J$
|
|
im = blend_im_rnd * im + (1 - blend_im_rnd) * ambient
|
|
ret[f'ambient{s}'].append(ambient[None].astype(np.float32))
|
|
|
|
# get the gradient magnitude of the ambient image $|\nabla A|$
|
|
ambient = ambient.astype(np.float32)
|
|
sobelx = cv2.Sobel(ambient, cv2.CV_32F, 1, 0, ksize=5)
|
|
sobely = cv2.Sobel(ambient, cv2.CV_32F, 0, 1, ksize=5)
|
|
grad = np.sqrt(sobelx ** 2 + sobely ** 2)
|
|
grad = np.maximum(grad - 0.8, 0.0) # parameter
|
|
|
|
# get the local contract normalized grad LCN($|\nabla A|$)
|
|
grad_lcn, grad_std = lcn.normalize(grad, 5, 0.1)
|
|
grad_lcn = np.clip(grad_lcn, 0.0, 1.0) # parameter
|
|
ret[f'grad{s}'].append(grad_lcn[None].astype(np.float32))
|
|
|
|
ret[f'im{s}'].append(im[None].astype(np.float32))
|
|
ret[f'mask{s}'].append(mask[None].astype(np.float32))
|
|
ret[f'disp{s}'].append(disp[None].astype(np.float32))
|
|
|
|
for key in ret.keys():
|
|
ret[key] = np.stack(ret[key], axis=0)
|
|
|
|
# save to files
|
|
out_dir = out_root / f'{idx:08d}'
|
|
out_dir.mkdir(exist_ok=True, parents=True)
|
|
for k, val in ret.items():
|
|
for tidx in range(track_length):
|
|
v = val[tidx]
|
|
out_path = out_dir / f'{k}_{tidx}.npy'
|
|
np.save(out_path, v)
|
|
np.save(str(out_dir / 'blend_im.npy'), blend_im_rnd)
|
|
|
|
print(f'create sample {idx + 1}/{n_samples} took {time.time() - tic}[s]')
|
|
|
|
|
|
def load_camera_parameters():
|
|
with open('calibration_result.xml') as f:
|
|
cam_mat = xmltodict.parse(f.read())
|
|
# weird casting cause the values are str(float) (eg. '123.'), but we want int
|
|
imsize = [int(float(x)) for x in cam_mat['opencv_storage']['img_shape']['data'].split()]
|
|
K_shape = int(cam_mat['opencv_storage']['cam_int']['rows']), int(cam_mat['opencv_storage']['cam_int']['cols'])
|
|
K = np.array(cam_mat['opencv_storage']['cam_int']['data'].split(), dtype=float).reshape(K_shape).T
|
|
return imsize, K
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
np.random.seed(42)
|
|
|
|
# output directory
|
|
with open('../config.json') as fp:
|
|
config = json.load(fp)
|
|
data_root = Path(config['DATA_ROOT'])
|
|
shapenet_root = config['SHAPENET_ROOT']
|
|
settings_imsize = tuple(map(int, config['IMSIZE'].split(',')))
|
|
|
|
data_type = 'syn'
|
|
out_root = data_root / f'{data_type}'
|
|
out_root.mkdir(parents=True, exist_ok=True)
|
|
|
|
start = 0
|
|
if len(sys.argv) >= 2 and isinstance(sys.argv[1], int):
|
|
start = sys.argv[1]
|
|
else:
|
|
if sys.argv[1] == '--resume':
|
|
try:
|
|
start = max([int(dir) for dir in os.listdir(out_root) if str.isdigit(dir)]) - 1 or 0
|
|
except:
|
|
pass
|
|
|
|
# load shapenet models
|
|
obj_classes = ['chair']
|
|
objs = get_objs(shapenet_root, obj_classes)
|
|
|
|
# camera parameters
|
|
imsize, K = load_camera_parameters()
|
|
# remember what was set in the settings
|
|
imsize = settings_imsize
|
|
|
|
imsizes = [(imsize[0] // (2 ** s), imsize[1] // (2 ** s)) for s in range(4)]
|
|
|
|
focal_lengths = [K[0, 0] / (2 ** s) for s in range(4)]
|
|
baseline = 0.075
|
|
blend_im = 0.6
|
|
noise = 0
|
|
|
|
# capture the same static scene from different view points as a track
|
|
track_length = 4
|
|
|
|
# load pattern image
|
|
# FIXME which one????
|
|
pattern_path = './kinect_pattern.png'
|
|
pattern_crop = True
|
|
patterns = get_patterns(pattern_path, imsizes, pattern_crop)
|
|
|
|
# write settings to file
|
|
settings = {
|
|
'imsizes': imsizes,
|
|
'patterns': patterns,
|
|
'focal_lengths': focal_lengths,
|
|
'baseline': baseline,
|
|
'K': K,
|
|
}
|
|
out_path = out_root / f'settings.pkl'
|
|
print(f'write settings to {out_path}')
|
|
with open(str(out_path), 'wb') as f:
|
|
pickle.dump(settings, f, pickle.HIGHEST_PROTOCOL)
|
|
|
|
# start the job
|
|
n_samples = 2 ** 10 + 2 ** 13
|
|
for idx in range(start, n_samples):
|
|
args = (out_root, idx, n_samples, imsize, patterns, K, baseline, blend_im, noise, track_length)
|
|
create_data(*args)
|
|
|