Metric3D/hubconf.py at main · tiandaji/Metric3D

History

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

dependencies = ['torch', 'torchvision']

import os

import torch

try:

from mmcv.utils import Config, DictAction

except:

from mmengine import Config, DictAction

from mono.model.monodepth_model import get_configured_monodepth_model

metric3d_dir = os.path.dirname(__file__)

MODEL_TYPE = {

'ConvNeXt-Tiny': {

'cfg_file': f'{metric3d_dir}/mono/configs/HourglassDecoder/convtiny.0.3_150.py',

'ckpt_file': 'https://huggingface.co/JUGGHM/Metric3D/resolve/main/convtiny_hourglass_v1.pth',

},

'ConvNeXt-Large': {

'cfg_file': f'{metric3d_dir}/mono/configs/HourglassDecoder/convlarge.0.3_150.py',

'ckpt_file': 'https://huggingface.co/JUGGHM/Metric3D/resolve/main/convlarge_hourglass_0.3_150_step750k_v1.1.pth',

},

'ViT-Small': {

'cfg_file': f'{metric3d_dir}/mono/configs/HourglassDecoder/vit.raft5.small.py',

'ckpt_file': 'https://huggingface.co/JUGGHM/Metric3D/resolve/main/metric_depth_vit_small_800k.pth',

},

'ViT-Large': {

'cfg_file': f'{metric3d_dir}/mono/configs/HourglassDecoder/vit.raft5.large.py',

'ckpt_file': 'https://huggingface.co/JUGGHM/Metric3D/resolve/main/metric_depth_vit_large_800k.pth',

},

'ViT-giant2': {

'cfg_file': f'{metric3d_dir}/mono/configs/HourglassDecoder/vit.raft5.giant2.py',

'ckpt_file': 'https://huggingface.co/JUGGHM/Metric3D/resolve/main/metric_depth_vit_giant2_800k.pth',

},

}

def metric3d_convnext_tiny(pretrain=False, **kwargs):

'''

Return a Metric3D model with ConvNeXt-Large backbone and Hourglass-Decoder head.

For usage examples, refer to: https://github.com/YvanYin/Metric3D/blob/main/hubconf.py

Args:

pretrain (bool): whether to load pretrained weights.

Returns:

model (nn.Module): a Metric3D model.

'''

cfg_file = MODEL_TYPE['ConvNeXt-Tiny']['cfg_file']

ckpt_file = MODEL_TYPE['ConvNeXt-Tiny']['ckpt_file']

cfg = Config.fromfile(cfg_file)

model = get_configured_monodepth_model(cfg)

if pretrain:

model.load_state_dict(

torch.hub.load_state_dict_from_url(ckpt_file)['model_state_dict'],

strict=False,

)

return model

def metric3d_convnext_large(pretrain=False, **kwargs):

'''

Return a Metric3D model with ConvNeXt-Large backbone and Hourglass-Decoder head.

For usage examples, refer to: https://github.com/YvanYin/Metric3D/blob/main/hubconf.py

Args:

pretrain (bool): whether to load pretrained weights.

Returns:

model (nn.Module): a Metric3D model.

'''

cfg_file = MODEL_TYPE['ConvNeXt-Large']['cfg_file']

ckpt_file = MODEL_TYPE['ConvNeXt-Large']['ckpt_file']

cfg = Config.fromfile(cfg_file)

model = get_configured_monodepth_model(cfg)

if pretrain:

model.load_state_dict(

torch.hub.load_state_dict_from_url(ckpt_file)['model_state_dict'],

strict=False,

)

return model

def metric3d_vit_small(pretrain=False, **kwargs):

'''

Return a Metric3D model with ViT-Small backbone and RAFT-4iter head.

For usage examples, refer to: https://github.com/YvanYin/Metric3D/blob/main/hubconf.py

Args:

pretrain (bool): whether to load pretrained weights.

Returns:

model (nn.Module): a Metric3D model.

'''

cfg_file = MODEL_TYPE['ViT-Small']['cfg_file']

ckpt_file = MODEL_TYPE['ViT-Small']['ckpt_file']

cfg = Config.fromfile(cfg_file)

model = get_configured_monodepth_model(cfg)

if pretrain:

model.load_state_dict(

torch.hub.load_state_dict_from_url(ckpt_file)['model_state_dict'],

strict=False,

)

return model

def metric3d_vit_large(pretrain=False, **kwargs):

'''

Return a Metric3D model with ViT-Large backbone and RAFT-8iter head.

For usage examples, refer to: https://github.com/YvanYin/Metric3D/blob/main/hubconf.py

Args:

pretrain (bool): whether to load pretrained weights.

Returns:

model (nn.Module): a Metric3D model.

'''

cfg_file = MODEL_TYPE['ViT-Large']['cfg_file']

ckpt_file = MODEL_TYPE['ViT-Large']['ckpt_file']

cfg = Config.fromfile(cfg_file)

model = get_configured_monodepth_model(cfg)

if pretrain:

model.load_state_dict(

torch.hub.load_state_dict_from_url(ckpt_file)['model_state_dict'],

strict=False,

)

return model

def metric3d_vit_giant2(pretrain=False, **kwargs):

'''

Return a Metric3D model with ViT-Giant2 backbone and RAFT-8iter head.

For usage examples, refer to: https://github.com/YvanYin/Metric3D/blob/main/hubconf.py

Args:

pretrain (bool): whether to load pretrained weights.

Returns:

model (nn.Module): a Metric3D model.

'''

cfg_file = MODEL_TYPE['ViT-giant2']['cfg_file']

ckpt_file = MODEL_TYPE['ViT-giant2']['ckpt_file']

cfg = Config.fromfile(cfg_file)

model = get_configured_monodepth_model(cfg)

if pretrain:

model.load_state_dict(

torch.hub.load_state_dict_from_url(ckpt_file)['model_state_dict'],

strict=False,

)

return model

if __name__ == '__main__':

import cv2

import numpy as np

#### prepare data

rgb_file = 'data/kitti_demo/rgb/0000000050.png'

depth_file = 'data/kitti_demo/depth/0000000050.png'

intrinsic = [707.0493, 707.0493, 604.0814, 180.5066]

gt_depth_scale = 256.0

rgb_origin = cv2.imread(rgb_file)[:, :, ::-1]

#### ajust input size to fit pretrained model

# keep ratio resize

input_size = (616, 1064) # for vit model

# input_size = (544, 1216) # for convnext model

h, w = rgb_origin.shape[:2]

scale = min(input_size[0] / h, input_size[1] / w)

rgb = cv2.resize(rgb_origin, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_LINEAR)

# remember to scale intrinsic, hold depth

intrinsic = [intrinsic[0] * scale, intrinsic[1] * scale, intrinsic[2] * scale, intrinsic[3] * scale]

# padding to input_size

padding = [123.675, 116.28, 103.53]

h, w = rgb.shape[:2]

pad_h = input_size[0] - h

pad_w = input_size[1] - w

pad_h_half = pad_h // 2

pad_w_half = pad_w // 2

rgb = cv2.copyMakeBorder(rgb, pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half, cv2.BORDER_CONSTANT, value=padding)

pad_info = [pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half]

#### normalize

mean = torch.tensor([123.675, 116.28, 103.53]).float()[:, None, None]

std = torch.tensor([58.395, 57.12, 57.375]).float()[:, None, None]

rgb = torch.from_numpy(rgb.transpose((2, 0, 1))).float()

rgb = torch.div((rgb - mean), std)

rgb = rgb[None, :, :, :].cuda()

###################### canonical camera space ######################

# inference

model = torch.hub.load('yvanyin/metric3d', 'metric3d_vit_small', pretrain=True)

model.cuda().eval()

with torch.no_grad():

pred_depth, confidence, output_dict = model.inference({'input': rgb})

# un pad

pred_depth = pred_depth.squeeze()

pred_depth = pred_depth[pad_info[0] : pred_depth.shape[0] - pad_info[1], pad_info[2] : pred_depth.shape[1] - pad_info[3]]

# upsample to original size

pred_depth = torch.nn.functional.interpolate(pred_depth[None, None, :, :], rgb_origin.shape[:2], mode='bilinear').squeeze()

###################### canonical camera space ######################

#### de-canonical transform

canonical_to_real_scale = intrinsic[0] / 1000.0 # 1000.0 is the focal length of canonical camera

pred_depth = pred_depth * canonical_to_real_scale # now the depth is metric

pred_depth = torch.clamp(pred_depth, 0, 300)

#### you can now do anything with the metric depth

# such as evaluate predicted depth

if depth_file is not None:

gt_depth = cv2.imread(depth_file, -1)

gt_depth = gt_depth / gt_depth_scale

gt_depth = torch.from_numpy(gt_depth).float().cuda()

assert gt_depth.shape == pred_depth.shape

mask = (gt_depth > 1e-8)

abs_rel_err = (torch.abs(pred_depth[mask] - gt_depth[mask]) / gt_depth[mask]).mean()

print('abs_rel_err:', abs_rel_err.item())

#### normal are also available

if 'prediction_normal' in output_dict: # only available for Metric3Dv2, i.e. vit model

pred_normal = output_dict['prediction_normal'][:, :3, :, :]

normal_confidence = output_dict['prediction_normal'][:, 3, :, :] # see https://arxiv.org/abs/2109.09881 for details

# un pad and resize to some size if needed

pred_normal = pred_normal.squeeze()

pred_normal = pred_normal[:, pad_info[0] : pred_normal.shape[1] - pad_info[1], pad_info[2] : pred_normal.shape[2] - pad_info[3]]

# you can now do anything with the normal

# such as visualize pred_normal

pred_normal_vis = pred_normal.cpu().numpy().transpose((1, 2, 0))

pred_normal_vis = (pred_normal_vis + 1) / 2

cv2.imwrite('normal_vis.png', (pred_normal_vis * 255).astype(np.uint8))

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

hubconf.py

hubconf.py

Files

hubconf.py

Latest commit

History

hubconf.py

File metadata and controls