forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
exitsnoop.py
executable file
·277 lines (245 loc) · 10.1 KB
/
exitsnoop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
#!/usr/bin/python
# @lint-avoid-python-3-compatibility-imports
from __future__ import print_function
import argparse
import ctypes as ct
import os
import platform
import re
import signal
import sys
from bcc import BPF
from datetime import datetime
from time import strftime
#
# exitsnoop Trace all process termination (exit, fatal signal)
# For Linux, uses BCC, eBPF. Embedded C.
#
# USAGE: exitsnoop [-h] [-x] [-t] [--utc] [--label[=LABEL]] [-p PID]
#
_examples = """examples:
exitsnoop # trace all process termination
exitsnoop -x # trace only fails, exclude exit(0)
exitsnoop -t # include timestamps (local time)
exitsnoop --utc # include timestamps (UTC)
exitsnoop -p 181 # only trace PID 181
exitsnoop --label=exit # label each output line with 'exit'
"""
"""
Exit status (from <include/sysexits.h>):
0 EX_OK Success
2 argparse error
70 EX_SOFTWARE syntax error detected by compiler, or
verifier error from kernel
77 EX_NOPERM Need sudo (CAP_SYS_ADMIN) for BPF() system call
The template for this script was Brendan Gregg's execsnoop
https://github.com/iovisor/bcc/blob/master/tools/execsnoop.py
More information about this script is in bcc/tools/exitsnoop_example.txt
Copyright 2016 Netflix, Inc.
Copyright 2019 Instana, Inc.
Licensed under the Apache License, Version 2.0 (the "License")
07-Feb-2016 Brendan Gregg (Netflix) Created execsnoop
04-May-2019 Arturo Martin-de-Nicolas (Instana) Created exitsnoop
13-May-2019 Jeroen Soeters (Instana) Refactor to import as module
"""
def _getParser():
parser = argparse.ArgumentParser(
description="Trace all process termination (exit, fatal signal)",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=_examples)
a=parser.add_argument
a("-t", "--timestamp", action="store_true", help="include timestamp (local time default)")
a("--utc", action="store_true", help="include timestamp in UTC (-t implied)")
a("-p", "--pid", help="trace this PID only")
a("--label", help="label each line")
a("-x", "--failed", action="store_true", help="trace only fails, exclude exit(0)")
# print the embedded C program and exit, for debugging
a("--ebpf", action="store_true", help=argparse.SUPPRESS)
# RHEL 7.6 keeps task->start_time as struct timespec, convert to u64 nanoseconds
a("--timespec", action="store_true", help=argparse.SUPPRESS)
return parser.parse_args
class Global():
parse_args = _getParser()
args = None
argv = None
SIGNUM_TO_SIGNAME = dict((v, re.sub("^SIG", "", k))
for k,v in signal.__dict__.items() if re.match("^SIG[A-Z]+$", k))
class Data(ct.Structure):
"""Event data matching struct data_t in _embedded_c()."""
_TASK_COMM_LEN = 16 # linux/sched.h
_pack_ = 1
_fields_ = [
("start_time", ct.c_ulonglong), # task->start_time, see --timespec arg
("exit_time", ct.c_ulonglong), # bpf_ktime_get_ns()
("pid", ct.c_uint), # task->tgid, thread group id == sys_getpid()
("tid", ct.c_uint), # task->pid, thread id == sys_gettid()
("ppid", ct.c_uint),# task->parent->tgid, notified of exit
("exit_code", ct.c_int),
("sig_info", ct.c_uint),
("task", ct.c_char * _TASK_COMM_LEN)
]
def _embedded_c(args):
"""Generate C program for sched_process_exit tracepoint in kernel/exit.c."""
c = """
EBPF_COMMENT
#include <linux/sched.h>
BPF_STATIC_ASSERT_DEF
struct data_t {
u64 start_time;
u64 exit_time;
u32 pid;
u32 tid;
u32 ppid;
int exit_code;
u32 sig_info;
char task[TASK_COMM_LEN];
} __attribute__((packed));
BPF_STATIC_ASSERT(sizeof(struct data_t) == CTYPES_SIZEOF_DATA);
BPF_PERF_OUTPUT(events);
TRACEPOINT_PROBE(sched, sched_process_exit)
{
struct task_struct *task = (typeof(task))bpf_get_current_task();
if (FILTER_PID || FILTER_EXIT_CODE) { return 0; }
struct data_t data = {
.start_time = PROCESS_START_TIME_NS,
.exit_time = bpf_ktime_get_ns(),
.pid = task->tgid,
.tid = task->pid,
.ppid = task->parent->tgid,
.exit_code = task->exit_code >> 8,
.sig_info = task->exit_code & 0xFF,
};
bpf_get_current_comm(&data.task, sizeof(data.task));
events.perf_submit(args, &data, sizeof(data));
return 0;
}
"""
# TODO: this macro belongs in bcc/src/cc/export/helpers.h
bpf_static_assert_def = r"""
#ifndef BPF_STATIC_ASSERT
#define BPF_STATIC_ASSERT(condition) __attribute__((unused)) \
extern int bpf_static_assert[(condition) ? 1 : -1]
#endif
"""
code_substitutions = [
('EBPF_COMMENT', '' if not Global.args.ebpf else _ebpf_comment()),
("BPF_STATIC_ASSERT_DEF", bpf_static_assert_def),
("CTYPES_SIZEOF_DATA", str(ct.sizeof(Data))),
('FILTER_PID', '0' if not Global.args.pid else "task->tgid != %s" % Global.args.pid),
('FILTER_EXIT_CODE', '0' if not Global.args.failed else 'task->exit_code == 0'),
('PROCESS_START_TIME_NS', 'task->start_time' if not Global.args.timespec else
'(task->start_time.tv_sec * 1000000000L) + task->start_time.tv_nsec'),
]
for old,new in code_substitutions:
c = c.replace(old, new)
return c
def _ebpf_comment():
"""Return a C-style comment with information about the generated code."""
comment=('Created by %s at %s:\n\t%s' %
(sys.argv[0], strftime("%Y-%m-%d %H:%M:%S %Z"), _embedded_c.__doc__))
args = str(vars(Global.args)).replace('{','{\n\t').replace(', ',',\n\t').replace('}',',\n }\n\n')
return ("\n /*" + ("\n %s\n\n ARGV = %s\n\n ARGS = %s/" %
(comment, ' '.join(Global.argv), args))
.replace('\n','\n\t*').replace('\t',' '))
def _print_header():
if Global.args.timestamp:
title = 'TIME-' + ('UTC' if Global.args.utc else strftime("%Z"))
print("%-13s" % title, end="")
if Global.args.label is not None:
print("%-6s" % "LABEL", end="")
print("%-16s %-6s %-6s %-6s %-7s %-10s" %
("PCOMM", "PID", "PPID", "TID", "AGE(s)", "EXIT_CODE"))
def _print_event(cpu, data, size): # callback
"""Print the exit event."""
e = ct.cast(data, ct.POINTER(Data)).contents
if Global.args.timestamp:
now = datetime.utcnow() if Global.args.utc else datetime.now()
print("%-13s" % (now.strftime("%H:%M:%S.%f")[:-3]), end="")
if Global.args.label is not None:
label = Global.args.label if len(Global.args.label) else 'exit'
print("%-6s" % label, end="")
age = (e.exit_time - e.start_time) / 1e9
print("%-16s %-6d %-6d %-6d %-7.2f " %
(e.task.decode(), e.pid, e.ppid, e.tid, age), end="")
if e.sig_info == 0:
print("0" if e.exit_code == 0 else "code %d" % e.exit_code)
else:
sig = e.sig_info & 0x7F
if sig:
print("signal %d (%s)" % (sig, signum_to_signame(sig)), end="")
if e.sig_info & 0x80:
print(", core dumped ", end="")
print()
# =============================
# Module: These functions are available for import
# =============================
def initialize(arg_list = sys.argv[1:]):
"""Trace all process termination.
arg_list - list of args, if omitted then uses command line args
arg_list is passed to argparse.ArgumentParser.parse_args()
For example, if arg_list = [ '-x', '-t' ]
args.failed == True
args.timestamp == True
Returns a tuple (return_code, result)
0 = Ok, result is the return value from BPF()
1 = args.ebpf is requested, result is the generated C code
os.EX_NOPERM: need CAP_SYS_ADMIN, result is error message
os.EX_SOFTWARE: internal software error, result is error message
"""
Global.argv = arg_list
Global.args = Global.parse_args(arg_list)
if Global.args.utc and not Global.args.timestamp:
Global.args.timestamp = True
if not Global.args.ebpf and os.geteuid() != 0:
return (os.EX_NOPERM, "Need sudo (CAP_SYS_ADMIN) for BPF() system call")
if re.match('^3\.10\..*el7.*$', platform.release()): # Centos/Red Hat
Global.args.timespec = True
for _ in range(2):
c = _embedded_c(Global.args)
if Global.args.ebpf:
return (1, c)
try:
return (os.EX_OK, BPF(text=c))
except Exception as e:
error = format(e)
if (not Global.args.timespec
and error.find('struct timespec')
and error.find('start_time')):
print('This kernel keeps task->start_time in a struct timespec.\n' +
'Retrying with --timespec')
Global.args.timespec = True
continue
return (os.EX_SOFTWARE, "BPF error: " + error)
except:
return (os.EX_SOFTWARE, "Unexpected error: {0}".format(sys.exc_info()[0]))
def snoop(bpf, event_handler):
"""Call event_handler for process termination events.
bpf - result returned by successful initialize()
event_handler - callback function to handle termination event
args.pid - Return after event_handler is called, only monitoring this pid
"""
bpf["events"].open_perf_buffer(event_handler)
while True:
bpf.perf_buffer_poll()
if Global.args.pid:
return
def signum_to_signame(signum):
"""Return the name of the signal corresponding to signum."""
return Global.SIGNUM_TO_SIGNAME.get(signum, "unknown")
# =============================
# Script: invoked as a script
# =============================
def main():
try:
rc, buffer = initialize()
if rc:
print(buffer)
sys.exit(0 if Global.args.ebpf else rc)
_print_header()
snoop(buffer, _print_event)
except KeyboardInterrupt:
print()
sys.exit()
return 0
if __name__ == '__main__':
main()