yolov5/detect.py at master · ultralytics/yolov5

History

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

# Ultralytics YOLOv5 🚀, AGPL-3.0 license

"""

Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc.

Usage - sources:

$ python detect.py --weights yolov5s.pt --source 0 # webcam

img.jpg # image

vid.mp4 # video

screen # screenshot

path/ # directory

list.txt # list of images

list.streams # list of streams

'path/*.jpg' # glob

'https://youtu.be/LNwODJXcvt4' # YouTube

'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream

Usage - formats:

$ python detect.py --weights yolov5s.pt # PyTorch

yolov5s.torchscript # TorchScript

yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn

yolov5s_openvino_model # OpenVINO

yolov5s.engine # TensorRT

yolov5s.mlpackage # CoreML (macOS-only)

yolov5s_saved_model # TensorFlow SavedModel

yolov5s.pb # TensorFlow GraphDef

yolov5s.tflite # TensorFlow Lite

yolov5s_edgetpu.tflite # TensorFlow Edge TPU

yolov5s_paddle_model # PaddlePaddle

"""

import argparse

import csv

import os

import platform

import sys

from pathlib import Path

import torch

FILE = Path(__file__).resolve()

ROOT = FILE.parents[0] # YOLOv5 root directory

if str(ROOT) not in sys.path:

sys.path.append(str(ROOT)) # add ROOT to PATH

ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative

from ultralytics.utils.plotting import Annotator, colors, save_one_box

from models.common import DetectMultiBackend

from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams

from utils.general import (

LOGGER,

Profile,

check_file,

check_img_size,

check_imshow,

check_requirements,

colorstr,

cv2,

increment_path,

non_max_suppression,

print_args,

scale_boxes,

strip_optimizer,

xyxy2xywh,

)

from utils.torch_utils import select_device, smart_inference_mode

@smart_inference_mode()

def run(

weights=ROOT / "yolov5s.pt", # model path or triton URL

source=ROOT / "data/images", # file/dir/URL/glob/screen/0(webcam)

data=ROOT / "data/coco128.yaml", # dataset.yaml path

imgsz=(640, 640), # inference size (height, width)

conf_thres=0.25, # confidence threshold

iou_thres=0.45, # NMS IOU threshold

max_det=1000, # maximum detections per image

device="", # cuda device, i.e. 0 or 0,1,2,3 or cpu

view_img=False, # show results

save_txt=False, # save results to *.txt

save_format=0, # save boxes coordinates in YOLO format or Pascal-VOC format (0 for YOLO and 1 for Pascal-VOC)

save_csv=False, # save results in CSV format

save_conf=False, # save confidences in --save-txt labels

save_crop=False, # save cropped prediction boxes

nosave=False, # do not save images/videos

classes=None, # filter by class: --class 0, or --class 0 2 3

agnostic_nms=False, # class-agnostic NMS

augment=False, # augmented inference

visualize=False, # visualize features

update=False, # update all models

project=ROOT / "runs/detect", # save results to project/name

name="exp", # save results to project/name

exist_ok=False, # existing project/name ok, do not increment

line_thickness=3, # bounding box thickness (pixels)

hide_labels=False, # hide labels

hide_conf=False, # hide confidences

half=False, # use FP16 half-precision inference

dnn=False, # use OpenCV DNN for ONNX inference

vid_stride=1, # video frame-rate stride

):

"""

Runs YOLOv5 detection inference on various sources like images, videos, directories, streams, etc.

Args:

weights (str | Path): Path to the model weights file or a Triton URL. Default is 'yolov5s.pt'.

source (str | Path): Input source, which can be a file, directory, URL, glob pattern, screen capture, or webcam

index. Default is 'data/images'.

data (str | Path): Path to the dataset YAML file. Default is 'data/coco128.yaml'.

imgsz (tuple[int, int]): Inference image size as a tuple (height, width). Default is (640, 640).

conf_thres (float): Confidence threshold for detections. Default is 0.25.

iou_thres (float): Intersection Over Union (IOU) threshold for non-max suppression. Default is 0.45.

max_det (int): Maximum number of detections per image. Default is 1000.

device (str): CUDA device identifier (e.g., '0' or '0,1,2,3') or 'cpu'. Default is an empty string, which uses the

best available device.

view_img (bool): If True, display inference results using OpenCV. Default is False.

save_txt (bool): If True, save results in a text file. Default is False.

save_csv (bool): If True, save results in a CSV file. Default is False.

save_conf (bool): If True, include confidence scores in the saved results. Default is False.

save_crop (bool): If True, save cropped prediction boxes. Default is False.

nosave (bool): If True, do not save inference images or videos. Default is False.

classes (list[int]): List of class indices to filter detections by. Default is None.

agnostic_nms (bool): If True, perform class-agnostic non-max suppression. Default is False.

augment (bool): If True, use augmented inference. Default is False.

visualize (bool): If True, visualize feature maps. Default is False.

update (bool): If True, update all models' weights. Default is False.

project (str | Path): Directory to save results. Default is 'runs/detect'.

name (str): Name of the current experiment; used to create a subdirectory within 'project'. Default is 'exp'.

exist_ok (bool): If True, existing directories with the same name are reused instead of being incremented. Default is

False.

line_thickness (int): Thickness of bounding box lines in pixels. Default is 3.

hide_labels (bool): If True, do not display labels on bounding boxes. Default is False.

hide_conf (bool): If True, do not display confidence scores on bounding boxes. Default is False.

half (bool): If True, use FP16 half-precision inference. Default is False.

dnn (bool): If True, use OpenCV DNN backend for ONNX inference. Default is False.

vid_stride (int): Stride for processing video frames, to skip frames between processing. Default is 1.

Returns:

None

Examples:

```python

from ultralytics import run

# Run inference on an image

run(source='data/images/example.jpg', weights='yolov5s.pt', device='0')

# Run inference on a video with specific confidence threshold

run(source='data/videos/example.mp4', weights='yolov5s.pt', conf_thres=0.4, device='0')

```

"""

source = str(source)

save_img = not nosave and not source.endswith(".txt") # save inference images

is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)

is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://"))

webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)

screenshot = source.lower().startswith("screen")

if is_url and is_file:

source = check_file(source) # download

# Directories

save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run

(save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir

# Load model

device = select_device(device)

model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)

stride, names, pt = model.stride, model.names, model.pt

imgsz = check_img_size(imgsz, s=stride) # check image size

# Dataloader

bs = 1 # batch_size

if webcam:

view_img = check_imshow(warn=True)

dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)

bs = len(dataset)

elif screenshot:

dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)

else:

dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)

vid_path, vid_writer = [None] * bs, [None] * bs

# Run inference

model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup

seen, windows, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device))

for path, im, im0s, vid_cap, s in dataset:

with dt[0]:

im = torch.from_numpy(im).to(model.device)

im = im.half() if model.fp16 else im.float() # uint8 to fp16/32

im /= 255 # 0 - 255 to 0.0 - 1.0

if len(im.shape) == 3:

im = im[None] # expand for batch dim

if model.xml and im.shape[0] > 1:

ims = torch.chunk(im, im.shape[0], 0)

# Inference

with dt[1]:

visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False

if model.xml and im.shape[0] > 1:

pred = None

for image in ims:

if pred is None:

pred = model(image, augment=augment, visualize=visualize).unsqueeze(0)

else:

pred = torch.cat((pred, model(image, augment=augment, visualize=visualize).unsqueeze(0)), dim=0)

pred = [pred, None]

else:

pred = model(im, augment=augment, visualize=visualize)

# NMS

with dt[2]:

pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)

# Second-stage classifier (optional)

# pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)

# Define the path for the CSV file

csv_path = save_dir / "predictions.csv"

# Create or append to the CSV file

def write_to_csv(image_name, prediction, confidence):

"""Writes prediction data for an image to a CSV file, appending if the file exists."""

data = {"Image Name": image_name, "Prediction": prediction, "Confidence": confidence}

with open(csv_path, mode="a", newline="") as f:

writer = csv.DictWriter(f, fieldnames=data.keys())

if not csv_path.is_file():

writer.writeheader()

writer.writerow(data)

# Process predictions

for i, det in enumerate(pred): # per image

seen += 1

if webcam: # batch_size >= 1

p, im0, frame = path[i], im0s[i].copy(), dataset.count

s += f"{i}: "

else:

p, im0, frame = path, im0s.copy(), getattr(dataset, "frame", 0)

p = Path(p) # to Path

save_path = str(save_dir / p.name) # im.jpg

txt_path = str(save_dir / "labels" / p.stem) + ("" if dataset.mode == "image" else f"_{frame}") # im.txt

s += "{:g}x{:g} ".format(*im.shape[2:]) # print string

gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh

imc = im0.copy() if save_crop else im0 # for save_crop

annotator = Annotator(im0, line_width=line_thickness, example=str(names))

if len(det):

# Rescale boxes from img_size to im0 size

det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()

# Print results

for c in det[:, 5].unique():

n = (det[:, 5] == c).sum() # detections per class

s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string

# Write results

for *xyxy, conf, cls in reversed(det):

c = int(cls) # integer class

label = names[c] if hide_conf else f"{names[c]}"

confidence = float(conf)

confidence_str = f"{confidence:.2f}"

if save_csv:

write_to_csv(p.name, label, confidence_str)

if save_txt: # Write to file

if save_format == 0:

coords = (

(xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()

) # normalized xywh

else:

coords = (torch.tensor(xyxy).view(1, 4) / gn).view(-1).tolist() # xyxy

line = (cls, *coords, conf) if save_conf else (cls, *coords) # label format

with open(f"{txt_path}.txt", "a") as f:

f.write(("%g " * len(line)).rstrip() % line + "\n")

if save_img or save_crop or view_img: # Add bbox to image

c = int(cls) # integer class

label = None if hide_labels else (names[c] if hide_conf else f"{names[c]} {conf:.2f}")

annotator.box_label(xyxy, label, color=colors(c, True))

if save_crop:

save_one_box(xyxy, imc, file=save_dir / "crops" / names[c] / f"{p.stem}.jpg", BGR=True)

# Stream results

im0 = annotator.result()

if view_img:

if platform.system() == "Linux" and p not in windows:

windows.append(p)

cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)

cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])

cv2.imshow(str(p), im0)

cv2.waitKey(1) # 1 millisecond

# Save results (image with detections)

if save_img:

if dataset.mode == "image":

cv2.imwrite(save_path, im0)

else: # 'video' or 'stream'

if vid_path[i] != save_path: # new video

vid_path[i] = save_path

if isinstance(vid_writer[i], cv2.VideoWriter):

vid_writer[i].release() # release previous video writer

if vid_cap: # video

fps = vid_cap.get(cv2.CAP_PROP_FPS)

w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))

h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

else: # stream

fps, w, h = 30, im0.shape[1], im0.shape[0]

save_path = str(Path(save_path).with_suffix(".mp4")) # force *.mp4 suffix on results videos

vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

vid_writer[i].write(im0)

# Print time (inference-only)

LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")

# Print results

t = tuple(x.t / seen * 1e3 for x in dt) # speeds per image

LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}" % t)

if save_txt or save_img:

s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ""

LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")

if update:

strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning)

def parse_opt():

"""

Parse command-line arguments for YOLOv5 detection, allowing custom inference options and model configurations.

Args:

--weights (str | list[str], optional): Model path or Triton URL. Defaults to ROOT / 'yolov5s.pt'.

--source (str, optional): File/dir/URL/glob/screen/0(webcam). Defaults to ROOT / 'data/images'.

--data (str, optional): Dataset YAML path. Provides dataset configuration information.

--imgsz (list[int], optional): Inference size (height, width). Defaults to [640].

--conf-thres (float, optional): Confidence threshold. Defaults to 0.25.

--iou-thres (float, optional): NMS IoU threshold. Defaults to 0.45.

--max-det (int, optional): Maximum number of detections per image. Defaults to 1000.

--device (str, optional): CUDA device, i.e., '0' or '0,1,2,3' or 'cpu'. Defaults to "".

--view-img (bool, optional): Flag to display results. Defaults to False.

--save-txt (bool, optional): Flag to save results to *.txt files. Defaults to False.

--save-csv (bool, optional): Flag to save results in CSV format. Defaults to False.

--save-conf (bool, optional): Flag to save confidences in labels saved via --save-txt. Defaults to False.

--save-crop (bool, optional): Flag to save cropped prediction boxes. Defaults to False.

--nosave (bool, optional): Flag to prevent saving images/videos. Defaults to False.

--classes (list[int], optional): List of classes to filter results by, e.g., '--classes 0 2 3'. Defaults to None.

--agnostic-nms (bool, optional): Flag for class-agnostic NMS. Defaults to False.

--augment (bool, optional): Flag for augmented inference. Defaults to False.

--visualize (bool, optional): Flag for visualizing features. Defaults to False.

--update (bool, optional): Flag to update all models in the model directory. Defaults to False.

--project (str, optional): Directory to save results. Defaults to ROOT / 'runs/detect'.

--name (str, optional): Sub-directory name for saving results within --project. Defaults to 'exp'.

--exist-ok (bool, optional): Flag to allow overwriting if the project/name already exists. Defaults to False.

--line-thickness (int, optional): Thickness (in pixels) of bounding boxes. Defaults to 3.

--hide-labels (bool, optional): Flag to hide labels in the output. Defaults to False.

--hide-conf (bool, optional): Flag to hide confidences in the output. Defaults to False.

--half (bool, optional): Flag to use FP16 half-precision inference. Defaults to False.

--dnn (bool, optional): Flag to use OpenCV DNN for ONNX inference. Defaults to False.

--vid-stride (int, optional): Video frame-rate stride, determining the number of frames to skip in between

consecutive frames. Defaults to 1.

Returns:

argparse.Namespace: Parsed command-line arguments as an argparse.Namespace object.

Example:

```python

from ultralytics import YOLOv5

args = YOLOv5.parse_opt()

```

"""

parser = argparse.ArgumentParser()

parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s.pt", help="model path or triton URL")

parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)")

parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="(optional) dataset.yaml path")

parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w")

parser.add_argument("--conf-thres", type=float, default=0.25, help="confidence threshold")

parser.add_argument("--iou-thres", type=float, default=0.45, help="NMS IoU threshold")

parser.add_argument("--max-det", type=int, default=1000, help="maximum detections per image")

parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")

parser.add_argument("--view-img", action="store_true", help="show results")

parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")

parser.add_argument(

"--save-format",

type=int,

default=0,

help="whether to save boxes coordinates in YOLO format or Pascal-VOC format when save-txt is True, 0 for YOLO and 1 for Pascal-VOC",

)

parser.add_argument("--save-csv", action="store_true", help="save results in CSV format")

parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels")

parser.add_argument("--save-crop", action="store_true", help="save cropped prediction boxes")

parser.add_argument("--nosave", action="store_true", help="do not save images/videos")

parser.add_argument("--classes", nargs="+", type=int, help="filter by class: --classes 0, or --classes 0 2 3")

parser.add_argument("--agnostic-nms", action="store_true", help="class-agnostic NMS")

parser.add_argument("--augment", action="store_true", help="augmented inference")

parser.add_argument("--visualize", action="store_true", help="visualize features")

parser.add_argument("--update", action="store_true", help="update all models")

parser.add_argument("--project", default=ROOT / "runs/detect", help="save results to project/name")

parser.add_argument("--name", default="exp", help="save results to project/name")

parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")

parser.add_argument("--line-thickness", default=3, type=int, help="bounding box thickness (pixels)")

parser.add_argument("--hide-labels", default=False, action="store_true", help="hide labels")

parser.add_argument("--hide-conf", default=False, action="store_true", help="hide confidences")

parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")

parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")

parser.add_argument("--vid-stride", type=int, default=1, help="video frame-rate stride")

opt = parser.parse_args()

opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand

print_args(vars(opt))

return opt

def main(opt):

"""

Executes YOLOv5 model inference based on provided command-line arguments, validating dependencies before running.

Args:

opt (argparse.Namespace): Command-line arguments for YOLOv5 detection. See function `parse_opt` for details.

Returns:

None

Note:

This function performs essential pre-execution checks and initiates the YOLOv5 detection process based on user-specified

options. Refer to the usage guide and examples for more information about different sources and formats at:

https://github.com/ultralytics/ultralytics

Example usage:

```python

if __name__ == "__main__":

opt = parse_opt()

main(opt)

```

"""

check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))

run(**vars(opt))

if __name__ == "__main__":

opt = parse_opt()

main(opt)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

detect.py

detect.py

Files

detect.py

Latest commit

History

detect.py

File metadata and controls