bcc/tools/profile.py at master · kennyyu/bcc

History

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

#!/usr/bin/python

# @lint-avoid-python-3-compatibility-imports

#

# profile Profile CPU usage by sampling stack traces at a timed interval.

# For Linux, uses BCC, BPF, perf_events. Embedded C.

#

# This is an efficient profiler, as stack traces are frequency counted in

# kernel context, rather than passing every stack to user space for frequency

# counting there. Only the unique stacks and counts are passed to user space

# at the end of the profile, greatly reducing the kernel<->user transfer.

#

# REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support). Under tools/old is

# a version of this tool that may work on Linux 4.6 - 4.8.

#

# Licensed under the Apache License, Version 2.0 (the "License")

#

# THANKS: Alexei Starovoitov, who added proper BPF profiling support to Linux;

# Sasha Goldshtein, Andrew Birchall, and Evgeny Vereshchagin, who wrote much

# of the code here, borrowed from tracepoint.py and offcputime.py; and

# Teng Qin, who added perf support in bcc.

#

# 15-Jul-2016 Brendan Gregg Created this.

# 20-Oct-2016 " " Switched to use the new 4.9 support.

from __future__ import print_function

from bcc import BPF, PerfType, PerfSWConfig

from sys import stderr

from time import sleep

import argparse

import signal

import os

import errno

import multiprocessing

import ctypes as ct

#

# Process Arguments

#

# arg validation

def positive_int(val):

try:

ival = int(val)

except ValueError:

raise argparse.ArgumentTypeError("must be an integer")

if ival < 0:

raise argparse.ArgumentTypeError("must be positive")

return ival

def positive_nonzero_int(val):

ival = positive_int(val)

if ival == 0:

raise argparse.ArgumentTypeError("must be nonzero")

return ival

def stack_id_err(stack_id):

# -EFAULT in get_stackid normally means the stack-trace is not availible,

# Such as getting kernel stack trace in userspace code

return (stack_id < 0) and (stack_id != -errno.EFAULT)

# arguments

examples = """examples:

./profile # profile stack traces at 49 Hertz until Ctrl-C

./profile -F 99 # profile stack traces at 99 Hertz

./profile -c 1000000 # profile stack traces every 1 in a million events

./profile 5 # profile at 49 Hertz for 5 seconds only

./profile -f 5 # output in folded format for flame graphs

./profile -p 185 # only profile threads for PID 185

./profile -U # only show user space stacks (no kernel)

./profile -K # only show kernel space stacks (no user)

"""

parser = argparse.ArgumentParser(

description="Profile CPU stack traces at a timed interval",

formatter_class=argparse.RawDescriptionHelpFormatter,

epilog=examples)

thread_group = parser.add_mutually_exclusive_group()

thread_group.add_argument("-p", "--pid", type=positive_int,

help="profile this PID only")

# TODO: add options for user/kernel threads only

stack_group = parser.add_mutually_exclusive_group()

stack_group.add_argument("-U", "--user-stacks-only", action="store_true",

help="show stacks from user space only (no kernel space stacks)")

stack_group.add_argument("-K", "--kernel-stacks-only", action="store_true",

help="show stacks from kernel space only (no user space stacks)")

sample_group = parser.add_mutually_exclusive_group()

sample_group.add_argument("-F", "--frequency", type=positive_int,

help="sample frequency, Hertz")

sample_group.add_argument("-c", "--count", type=positive_int,

help="sample period, number of events")

parser.add_argument("-d", "--delimited", action="store_true",

help="insert delimiter between kernel/user stacks")

parser.add_argument("-a", "--annotations", action="store_true",

help="add _[k] annotations to kernel frames")

parser.add_argument("-f", "--folded", action="store_true",

help="output folded format, one line per stack (for flame graphs)")

parser.add_argument("--stack-storage-size", default=16384,

type=positive_nonzero_int,

help="the number of unique stack traces that can be stored and "

"displayed (default %(default)s)")

parser.add_argument("duration", nargs="?", default=99999999,

type=positive_nonzero_int,

help="duration of trace, in seconds")

parser.add_argument("-C", "--cpu", type=int, default=-1,

help="cpu number to run profile on")

parser.add_argument("--ebpf", action="store_true",

help=argparse.SUPPRESS)

# option logic

args = parser.parse_args()

pid = int(args.pid) if args.pid is not None else -1

duration = int(args.duration)

debug = 0

need_delimiter = args.delimited and not (args.kernel_stacks_only or

args.user_stacks_only)

# TODO: add stack depth, and interval

#

# Setup BPF

#

# define BPF program

bpf_text = """

#include <uapi/linux/ptrace.h>

#include <uapi/linux/bpf_perf_event.h>

#include <linux/sched.h>

struct key_t {

u32 pid;

u64 kernel_ip;

u64 kernel_ret_ip;

int user_stack_id;

int kernel_stack_id;

char name[TASK_COMM_LEN];

};

BPF_HASH(counts, struct key_t);

BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);

// This code gets a bit complex. Probably not suitable for casual hacking.

int do_perf_event(struct bpf_perf_event_data *ctx) {

u32 pid = bpf_get_current_pid_tgid() >> 32;

if (!(THREAD_FILTER))

return 0;

// create map key

struct key_t key = {.pid = pid};

bpf_get_current_comm(&key.name, sizeof(key.name));

// get stacks

key.user_stack_id = USER_STACK_GET;

key.kernel_stack_id = KERNEL_STACK_GET;

if (key.kernel_stack_id >= 0) {

// populate extras to fix the kernel stack

u64 ip = PT_REGS_IP(&ctx->regs);

u64 page_offset;

// if ip isn't sane, leave key ips as zero for later checking

#if defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE)

// x64, 4.16, ..., 4.11, etc., but some earlier kernel didn't have it

page_offset = __PAGE_OFFSET_BASE;

#elif defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE_L4)

// x64, 4.17, and later

#if defined(CONFIG_DYNAMIC_MEMORY_LAYOUT) && defined(CONFIG_X86_5LEVEL)

page_offset = __PAGE_OFFSET_BASE_L5;

#else

page_offset = __PAGE_OFFSET_BASE_L4;

#endif

#else

// earlier x86_64 kernels, e.g., 4.6, comes here

// arm64, s390, powerpc, x86_32

page_offset = PAGE_OFFSET;

#endif

if (ip > page_offset) {

key.kernel_ip = ip;

}

counts.increment(key);

return 0;

}

"""

# set thread filter

thread_context = ""

perf_filter = "-a"

if args.pid is not None:

thread_context = "PID %s" % args.pid

thread_filter = 'pid == %s' % args.pid

perf_filter = '-p %s' % args.pid

else:

thread_context = "all threads"

thread_filter = '1'

bpf_text = bpf_text.replace('THREAD_FILTER', thread_filter)

# set stack storage size

bpf_text = bpf_text.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size))

# handle stack args

kernel_stack_get = "stack_traces.get_stackid(&ctx->regs, 0)"

user_stack_get = "stack_traces.get_stackid(&ctx->regs, BPF_F_USER_STACK)"

stack_context = ""

if args.user_stacks_only:

stack_context = "user"

kernel_stack_get = "-1"

elif args.kernel_stacks_only:

stack_context = "kernel"

user_stack_get = "-1"

else:

stack_context = "user + kernel"

bpf_text = bpf_text.replace('USER_STACK_GET', user_stack_get)

bpf_text = bpf_text.replace('KERNEL_STACK_GET', kernel_stack_get)

sample_freq = 0

sample_period = 0

if args.frequency:

sample_freq = args.frequency

elif args.count:

sample_period = args.count

else:

# If user didn't specify anything, use default 49Hz sampling

sample_freq = 49

sample_context = "%s%d %s" % (("", sample_freq, "Hertz") if sample_freq

else ("every ", sample_period, "events"))

# header

if not args.folded:

print("Sampling at %s of %s by %s stack" %

(sample_context, thread_context, stack_context), end="")

if args.cpu >= 0:

print(" on CPU#{}".format(args.cpu), end="")

if duration < 99999999:

print(" for %d secs." % duration)

else:

print("... Hit Ctrl-C to end.")

if debug or args.ebpf:

print(bpf_text)

if args.ebpf:

exit()

# initialize BPF & perf_events

b = BPF(text=bpf_text)

b.attach_perf_event(ev_type=PerfType.SOFTWARE,

ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event",

sample_period=sample_period, sample_freq=sample_freq, cpu=args.cpu)

# signal handler

def signal_ignore(signal, frame):

print()

#

# Output Report

#

# collect samples

try:

sleep(duration)

except KeyboardInterrupt:

# as cleanup can take some time, trap Ctrl-C:

signal.signal(signal.SIGINT, signal_ignore)

if not args.folded:

print()

def aksym(addr):

if args.annotations:

return b.ksym(addr) + "_[k]".encode()

else:

return b.ksym(addr)

# output stacks

missing_stacks = 0

has_enomem = False

counts = b.get_table("counts")

stack_traces = b.get_table("stack_traces")

need_delimiter = args.delimited and not (args.kernel_stacks_only or

args.user_stacks_only)

for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):

# handle get_stackid errors

if not args.user_stacks_only and stack_id_err(k.kernel_stack_id):

missing_stacks += 1

has_enomem = has_enomem or k.kernel_stack_id == -errno.ENOMEM

if not args.kernel_stacks_only and stack_id_err(k.user_stack_id):

missing_stacks += 1

has_enomem = has_enomem or k.user_stack_id == -errno.ENOMEM

user_stack = [] if k.user_stack_id < 0 else \

stack_traces.walk(k.user_stack_id)

kernel_tmp = [] if k.kernel_stack_id < 0 else \

stack_traces.walk(k.kernel_stack_id)

# fix kernel stack

kernel_stack = []

if k.kernel_stack_id >= 0:

for addr in kernel_tmp:

kernel_stack.append(addr)

# the later IP checking

if k.kernel_ip:

kernel_stack.insert(0, k.kernel_ip)

if args.folded:

# print folded stack output

user_stack = list(user_stack)

kernel_stack = list(kernel_stack)

line = [k.name]

# if we failed to get the stack is, such as due to no space (-ENOMEM) or

# hash collision (-EEXIST), we still print a placeholder for consistency

if not args.kernel_stacks_only:

if stack_id_err(k.user_stack_id):

line.append("[Missed User Stack]")

else:

line.extend([b.sym(addr, k.pid) for addr in reversed(user_stack)])

if not args.user_stacks_only:

line.extend(["-"] if (need_delimiter and k.kernel_stack_id >= 0 and k.user_stack_id >= 0) else [])

if stack_id_err(k.kernel_stack_id):

line.append("[Missed Kernel Stack]")

else:

line.extend([aksym(addr) for addr in reversed(kernel_stack)])

print("%s %d" % (b";".join(line).decode('utf-8', 'replace'), v.value))

else:

# print default multi-line stack output

if not args.user_stacks_only:

if stack_id_err(k.kernel_stack_id):

print(" [Missed Kernel Stack]")

else:

for addr in kernel_stack:

print(" %s" % aksym(addr))

if not args.kernel_stacks_only:

if need_delimiter and k.user_stack_id >= 0 and k.kernel_stack_id >= 0:

print(" --")

if stack_id_err(k.user_stack_id):

print(" [Missed User Stack]")

else:

for addr in user_stack:

print(" %s" % b.sym(addr, k.pid).decode('utf-8', 'replace'))

print(" %-16s %s (%d)" % ("-", k.name.decode('utf-8', 'replace'), k.pid))

print(" %d\n" % v.value)

# check missing

if missing_stacks > 0:

enomem_str = "" if not has_enomem else \

" Consider increasing --stack-storage-size."

print("WARNING: %d stack traces could not be displayed.%s" %

(missing_stacks, enomem_str),

file=stderr)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

profile.py

profile.py

Files

profile.py

Latest commit

History

profile.py

File metadata and controls