Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 139 additions & 20 deletions teuthology/task/internal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,46 +299,138 @@ def check_conflict(ctx, config):
raise RuntimeError('Stale jobs detected, aborting.')


def get_backtraces_from_coredumps(coredump_path, dump_path, dump_program, dump):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here and in another method, could you please, use typing for arguments and method returns.
Also, the arguments description would be great to have in docstring.

"""
Get backtraces from coredumps found in path
On a future iteration, we can expand this to inject gdb commands from the test plan yaml
"""
gdb_output_path = os.path.join(coredump_path,
dump + '.gdb.txt')
log.info(f'Getting backtrace from core {dump} ...')
with open(gdb_output_path, 'w') as gdb_out:
gdb_proc = subprocess.Popen(
['gdb', '--batch', '-ex', 'set pagination 0',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like introduces another dependency for teuthology worker. Also what is gonna happen when teuthology is run in non-linux environment where gdb is not installed, for example, macosx?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kshtsk Precisely, I wanted to ask about the dependencies: where is the correct place to ensure the gdb package for the distro is being installed? How can we make this functionality optional, eg when using a debug build?

'-ex', 'thread apply all bt full',
dump_program, dump_path],
stdout=gdb_out,
stderr=subprocess.STDOUT
)
gdb_proc.wait()
log.info(f"core {dump} backtrace saved to {gdb_output_path}")


def fetch_binaries_for_coredumps(path, remote):
"""
Pul ELFs (debug and stripped) for each coredump found
Pull ELFs (debug and stripped) for each coredump found

The coredumps might appear compressed, either by gzip or zstd (Centos9)
The following are examples from the output of the 'file' command:
# 1422917770.7450.core: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, \
# from 'radosgw --rgw-socket-path /home/ubuntu/cephtest/apache/tmp.client.0/fastcgi_soc'
# Centos 9:
# core.ceph_test_neora.0.fb62b98.zst: Zstandard compressed data (v0.8+), Dictionary ID: None
# ELF 64-bit LSB core file, x86-64, version 1 (SYSV), SVR4-style, \
# from 'bin/ceph_test_neorados_snapshots --gtest_break_on_failure', real uid: 0, \
# effective uid: 0, real gid: 0, effective gid: 0, execfn: 'bin/ceph_test_neorados_snapshots', platform: 'x86_64'
"""
def _is_core_gziped(dump_path):
with open(dump_path, 'rb') as f:
magic = f.read(2)
if magic == b'\x1f\x8b':
return True
return False

def _is_core_zstded(dump_path):
with open(dump_path, 'rb') as f:
magic = f.read(4)
if magic == b'\x28\xb5\x2f\xfd':
return True
return False

csdict = {
'gzip': {
'check': _is_core_gziped,
'uncompress': [ 'gzip', '-d '],
'regex': r'.*gzip compressed data.*'
},
'zstd': {
'check': _is_core_zstded,
'uncompress': [ 'zstd', '-d '],
'regex': r'.*Zstandard compressed data.*'
}
}

def _get_compressed_type(dump_path):
for ck, cs in csdict.items():
if cs['check'](dump_path):
return ck
return None

def _looks_compressed(dump_out):
for cs in csdict.values():
if re.match(cs['regex'], dump_out):
return True
return False

def _uncompress_file(dump_path, cs_type):
if cs_type is None:
return None
# Construct a bash cmd to uncompress the file based on its type
try:
cmd = csdict[cs_type]['uncompress'] + [dump_path]
log.info(f'Uncompressing via {cmd} ...')
unc_output_path = dump_path.rsplit('.', 1)[0] + '.unc.log'
with open(unc_output_path , 'w') as _out:
unc = subprocess.Popen( cmd, stdout=_out, stderr=subprocess.STDOUT)
unc.wait()
# After uncompressing, the new file path is the original path without the compression suffix
uncompressed_path = dump_path.rsplit('.', 1)[0]
log.info(f'Uncompressed file path: {uncompressed_path}')
return uncompressed_path
except Exception as e:
log.info('Something went wrong while attempting to uncompress the file')
log.error(e)
return None

def _get_file_info(dump_path):
dump_info = subprocess.Popen(['file', dump_path],
stdout=subprocess.PIPE)
dump_out = dump_info.communicate()[0].decode()
return dump_out


# Check for Coredumps:
coredump_path = os.path.join(path, 'coredump')
if os.path.isdir(coredump_path):
log.info('Transferring binaries for coredumps...')
for dump in os.listdir(coredump_path):
# Pull program from core file
# Pull program (that dropped the core) from the core file info
dump_path = os.path.join(coredump_path, dump)
dump_info = subprocess.Popen(['file', dump_path],
stdout=subprocess.PIPE)
dump_out = dump_info.communicate()[0].decode()

# Parse file output to get program, Example output:
# 1422917770.7450.core: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, \
# from 'radosgw --rgw-socket-path /home/ubuntu/cephtest/apache/tmp.client.0/fastcgi_soc'
dump_out = _get_file_info(dump_path)
log.info(f' core looks like: {dump_out}')

if 'gzip' in dump_out:
if _looks_compressed(dump_out):
# if the core is compressed, recognise the type and uncompress it
cs_type = _get_compressed_type(dump_path)
try:
log.info("core is compressed, try accessing gzip file ...")
with gzip.open(dump_path, 'rb') as f_in, \
tempfile.NamedTemporaryFile(mode='w+b') as f_out:
shutil.copyfileobj(f_in, f_out)
dump_info = subprocess.Popen(['file', f_out.name],
stdout=subprocess.PIPE)
dump_out = dump_info.communicate()[0].decode()
log.info(f' core looks like: {dump_out}')
log.info(f"core is compressed, try accessing {cs_type} file ...")
uncompressed_path = _uncompress_file(dump_path, cs_type)
if uncompressed_path is None:
log.info(f"Could not uncompress {dump}, moving on ...")
continue
except Exception as e:
log.info('Something went wrong while opening the compressed file')
log.error(e)
continue
dump_path = uncompressed_path
dump_out = _get_file_info(dump_path)
log.info(f' after uncompressing core looks like: {dump_out}')
try:
dump_command = re.findall("from '([^ ']+)", dump_out)[0]
dump_program = dump_command.split()[0]
log.info(f' dump_program: {dump_program}')
except Exception as e:
log.info("core doesn't have the desired format, moving on ...")
log.info(f"core {dump} doesn't have the desired format, moving on ...")
log.error(e)
continue

Expand All @@ -362,7 +454,23 @@ def fetch_binaries_for_coredumps(path, remote):
debug_path = '{debug_path}.debug'.format(debug_path=debug_path)

remote.get_file(debug_path, coredump_path)

# If debug symbols were found, rename them to match the binary
debug_filename = os.path.basename(debug_path)
local_debug_path = os.path.join(coredump_path, debug_filename)
if os.path.exists(local_debug_path):
new_debug_path = os.path.join(
coredump_path,
dump_program.lstrip(os.path.sep) + '.debug'
)
os.rename(local_debug_path, new_debug_path)

# Execute gdb to get the backtrace and locals
get_backtraces_from_coredumps(coredump_path, dump_path,
dump_program, dump)
# Compress the core file always to save space
with open(dump_path, 'rb') as f_in, \
gzip.open(dump_path + '.gz', 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)

def gzip_if_too_large(compress_min_size, src, tarinfo, local_path):
if tarinfo.size >= compress_min_size:
Expand Down Expand Up @@ -521,6 +629,17 @@ def coredump(ctx, config):
if 'failure_reason' not in ctx.summary:
ctx.summary['failure_reason'] = \
'Found coredumps on {rem}'.format(rem=rem)
# Add the backtraces of the coredumps to the failure reason
coredump_path = os.path.join(archive_dir, 'coredump')
for dump in os.listdir(coredump_path):
if dump.endswith('.gdb.txt'):
with open(os.path.join(coredump_path, dump), 'r') as f:
backtrace = f.read()
ctx.summary['failure_reason'] += \
'\n\nBacktrace from core {dump}:\n{bt}'.format(
dump=dump,
bt=backtrace
)


@contextlib.contextmanager
Expand Down
81 changes: 80 additions & 1 deletion teuthology/task/tests/test_fetch_coredumps.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from teuthology.task.internal import fetch_binaries_for_coredumps
from teuthology.task.internal import get_backtraces_from_coredumps
from unittest.mock import patch, Mock
import gzip
import os
Expand All @@ -19,6 +20,36 @@ def __init__(self, ret):
def communicate(self, input=None):
return [TestFetchCoreDumps.MockDecode(self.ret)]

class MockCompletedProcess(object):
def __init__(self, ret):
self.ret = ret

@property
def stdout(self):
return self.ret

class MockGdb(object):
def __init__(self, ret):
self.ret = ret

def run(self, *args, **kwargs):
return TestFetchCoreDumps.MockCompletedProcess(self.ret)

class TestGetBacktracesFromCoreDumps(object):
@patch('teuthology.task.internal.subprocess.run')
def test_get_backtraces_from_coredumps(self, mock_run):
mock_run.return_value = TestFetchCoreDumps.MockCompletedProcess(
"Backtrace line 1\nBacktrace line 2\nBacktrace line 3\n"
)
backtraces = get_backtraces_from_coredumps(coredump_path="core_dump_path", dump_path="binary_path",
dump_program="ceph_test_rados_api_io", dump="core_dump")
expected_backtraces = [
"Backtrace line 1",
"Backtrace line 2",
"Backtrace line 3"
]
assert backtraces == expected_backtraces

def setup_method(self):
self.the_function = fetch_binaries_for_coredumps
with gzip.open('file.gz', 'wb') as f:
Expand All @@ -44,6 +75,20 @@ def setup_method(self):
" 19:56:56 2022, from Unix, original size modulo 2^32 11"
)

# Centos 9 coredumps are zstd compressed:
self.zstd_compressed_correct = self.MockPopen(
"Zstandard compressed data"\
"'correct.format.core', last modified: Wed Jun 29"\
" 19:55:29 2022, from Unix, original size modulo 2^32 3167080"
)

self.zstd_compressed_incorrect = self.MockPopen(
"Zstandard compressed data"\
"'incorrect.format.core', last modified: Wed Jun 29"\
" 19:56:56 2022, from Unix, original size modulo 2^32 11"
)


# Core is not compressed and file is in the correct format
@patch('teuthology.task.internal.subprocess.Popen')
@patch('teuthology.task.internal.os')
Expand Down Expand Up @@ -112,5 +157,39 @@ def test_compressed_incorrect_format(self, m_os, m_subproc_popen):
self.the_function(None, self.m_remote)
assert self.m_remote.get_file.called == False

# Core is zstd-compressed and file is in the correct format
@patch('teuthology.task.internal.subprocess.Popen')
@patch('teuthology.task.internal.os')
def test_zstd_compressed_correct_format(self, m_os, m_subproc_popen):
m_subproc_popen.side_effect = [
self.zstd_compressed_correct,
self.uncompressed_correct
]
m_os.path.join.return_value = self.core_dump_path
m_os.path.sep = self.core_dump_path
m_os.path.isdir.return_value = True
m_os.path.dirname.return_value = self.core_dump_path
m_os.path.exists.return_value = True
m_os.listdir.return_value = [self.core_dump_path]
self.the_function(None, self.m_remote)
assert self.m_remote.get_file.called

# Core is compressed and file is in the wrong format
@patch('teuthology.task.internal.subprocess.Popen')
@patch('teuthology.task.internal.os')
def test_zstd_compressed_incorrect_format(self, m_os, m_subproc_popen):
m_subproc_popen.side_effect = [
self.zstd_compressed_incorrect,
self.uncompressed_incorrect
]
m_os.path.join.return_value = self.core_dump_path
m_os.path.sep = self.core_dump_path
m_os.path.isdir.return_value = True
m_os.path.dirname.return_value = self.core_dump_path
m_os.path.exists.return_value = True
m_os.listdir.return_value = [self.core_dump_path]
self.the_function(None, self.m_remote)
assert self.m_remote.get_file.called == False

def teardown(self):
os.remove(self.core_dump_path)
os.remove(self.core_dump_path)
Loading