ceph · perezjosibm · Nov 24, 2025 · Nov 27, 2025 · Dec 2, 2025 · Dec 16, 2025
@@ -299,46 +299,138 @@ def check_conflict(ctx, config):
         raise RuntimeError('Stale jobs detected, aborting.')
 
 
+def get_backtraces_from_coredumps(coredump_path, dump_path, dump_program, dump):
+    """
+    Get backtraces from coredumps found in path
+    On a future iteration, we can expand this to inject gdb commands from the test plan yaml
+    """
+    gdb_output_path = os.path.join(coredump_path,
+                                   dump + '.gdb.txt')
+    log.info(f'Getting backtrace from core {dump} ...')
+    with open(gdb_output_path, 'w') as gdb_out:
+        gdb_proc = subprocess.Popen(
+            ['gdb', '--batch', '-ex', 'set pagination 0',
+             '-ex', 'thread apply all bt full',
+             dump_program, dump_path],
+            stdout=gdb_out,
+            stderr=subprocess.STDOUT
+        )
+        gdb_proc.wait()
+        log.info(f"core {dump} backtrace saved to {gdb_output_path}")
+
+
 def fetch_binaries_for_coredumps(path, remote):
     """
-    Pul ELFs (debug and stripped) for each coredump found
+    Pull ELFs (debug and stripped) for each coredump found
+
+    The coredumps might appear compressed, either by gzip or zstd (Centos9)
+    The following are examples from the output of the 'file' command:
+    # 1422917770.7450.core: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, \
+    # from 'radosgw --rgw-socket-path /home/ubuntu/cephtest/apache/tmp.client.0/fastcgi_soc'
+    # Centos 9:
+    # core.ceph_test_neora.0.fb62b98.zst: Zstandard compressed data (v0.8+), Dictionary ID: None
+    #  ELF 64-bit LSB core file, x86-64, version 1 (SYSV), SVR4-style, \
+    # from 'bin/ceph_test_neorados_snapshots --gtest_break_on_failure', real uid: 0, \
+    # effective uid: 0, real gid: 0, effective gid: 0, execfn: 'bin/ceph_test_neorados_snapshots', platform: 'x86_64'
     """
+    def _is_core_gziped(dump_path):
+        with open(dump_path, 'rb') as f:
+            magic = f.read(2)
+            if magic == b'\x1f\x8b':
+                return True
+        return False
+
+    def _is_core_zstded(dump_path):
+        with open(dump_path, 'rb') as f:
+            magic = f.read(4)
+            if magic == b'\x28\xb5\x2f\xfd':
+                return True
+        return False
+
+    csdict = {
+        'gzip': {
+            'check': _is_core_gziped,
+            'uncompress': [ 'gzip',  '-d '],
+            'regex': r'.*gzip compressed data.*'
+        },
+        'zstd': {
+            'check': _is_core_zstded,
+            'uncompress': [ 'zstd',  '-d '],
+            'regex': r'.*Zstandard compressed data.*'
+        }
+    }
+
+    def _get_compressed_type(dump_path):
+        for ck, cs in csdict.items():
+            if cs['check'](dump_path):
+                return ck
+        return None
+
+    def _looks_compressed(dump_out):
+        for cs in csdict.values():
+            if re.match(cs['regex'], dump_out):
+                return True
+        return False
+
+    def _uncompress_file(dump_path, cs_type):
+        if cs_type is None:
+            return None
+        # Construct a bash cmd to uncompress the file based on its type
+        try:
+            cmd = csdict[cs_type]['uncompress'] + [dump_path]
+            log.info(f'Uncompressing via {cmd} ...')
+            unc_output_path = dump_path.rsplit('.', 1)[0] + '.unc.log'
+            with open(unc_output_path , 'w') as _out:
+                unc = subprocess.Popen( cmd, stdout=_out, stderr=subprocess.STDOUT)
+                unc.wait()
+            # After uncompressing, the new file path is the original path without the compression suffix
+            uncompressed_path = dump_path.rsplit('.', 1)[0]
+            log.info(f'Uncompressed file path: {uncompressed_path}')
+            return uncompressed_path
+        except Exception as e:
+            log.info('Something went wrong while attempting to uncompress the file')
+            log.error(e)
+            return None
+
+    def _get_file_info(dump_path):
+        dump_info = subprocess.Popen(['file', dump_path],
+                                     stdout=subprocess.PIPE)
+        dump_out = dump_info.communicate()[0].decode()
+        return dump_out
+
+
     # Check for Coredumps:
     coredump_path = os.path.join(path, 'coredump')
     if os.path.isdir(coredump_path):
         log.info('Transferring binaries for coredumps...')
         for dump in os.listdir(coredump_path):
-            # Pull program from core file
+            # Pull program (that dropped the core) from the core file info
             dump_path = os.path.join(coredump_path, dump)
-            dump_info = subprocess.Popen(['file', dump_path],
-                                         stdout=subprocess.PIPE)
-            dump_out = dump_info.communicate()[0].decode()
-
-            # Parse file output to get program, Example output:
-            # 1422917770.7450.core: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, \
-            # from 'radosgw --rgw-socket-path /home/ubuntu/cephtest/apache/tmp.client.0/fastcgi_soc'
+            dump_out = _get_file_info(dump_path)
             log.info(f' core looks like: {dump_out}')
 
-            if 'gzip' in dump_out:
+            if _looks_compressed(dump_out):
+                # if the core is compressed, recognise the type and uncompress it
+                cs_type = _get_compressed_type(dump_path)
                 try:
-                    log.info("core is compressed, try accessing gzip file ...")
-                    with gzip.open(dump_path, 'rb') as f_in, \
-                         tempfile.NamedTemporaryFile(mode='w+b') as f_out:
-                         shutil.copyfileobj(f_in, f_out)
-                         dump_info = subprocess.Popen(['file', f_out.name],
-                                                        stdout=subprocess.PIPE)
-                         dump_out = dump_info.communicate()[0].decode()
-                         log.info(f' core looks like: {dump_out}')
+                    log.info(f"core is compressed, try accessing {cs_type} file ...")
+                    uncompressed_path = _uncompress_file(dump_path, cs_type)
+                    if uncompressed_path is None:
+                        log.info(f"Could not uncompress {dump}, moving on ...")
+                        continue
                 except Exception as e:
                     log.info('Something went wrong while opening the compressed file')
                     log.error(e)
                     continue
+                dump_path = uncompressed_path
+                dump_out = _get_file_info(dump_path)
+                log.info(f' after uncompressing core looks like: {dump_out}')
             try:
                 dump_command = re.findall("from '([^ ']+)", dump_out)[0]
                 dump_program = dump_command.split()[0]
                 log.info(f' dump_program: {dump_program}')
             except Exception as e:
-                log.info("core doesn't have the desired format, moving on ...")
+                log.info(f"core {dump} doesn't have the desired format, moving on ...")
                 log.error(e)
                 continue
 
@@ -362,7 +454,23 @@ def fetch_binaries_for_coredumps(path, remote):
                 debug_path = '{debug_path}.debug'.format(debug_path=debug_path)
 
             remote.get_file(debug_path, coredump_path)
-
+            # If debug symbols were found, rename them to match the binary
+            debug_filename = os.path.basename(debug_path)
+            local_debug_path = os.path.join(coredump_path, debug_filename)
+            if os.path.exists(local_debug_path):
+                new_debug_path = os.path.join(
+                    coredump_path,
+                    dump_program.lstrip(os.path.sep) + '.debug'
+                )
+                os.rename(local_debug_path, new_debug_path)
+
+            # Execute gdb to get the backtrace and locals
+            get_backtraces_from_coredumps(coredump_path, dump_path,
+                                          dump_program, dump)
+            # Compress the core file always to save space
+            with open(dump_path, 'rb') as f_in, \
+                 gzip.open(dump_path + '.gz', 'wb') as f_out:
+                 shutil.copyfileobj(f_in, f_out)
 
 def gzip_if_too_large(compress_min_size, src, tarinfo, local_path):
     if tarinfo.size >= compress_min_size:
@@ -521,6 +629,17 @@ def coredump(ctx, config):
             if 'failure_reason' not in ctx.summary:
                 ctx.summary['failure_reason'] = \
                     'Found coredumps on {rem}'.format(rem=rem)
+                # Add the backtraces of the coredumps to the failure reason
+                coredump_path = os.path.join(archive_dir, 'coredump')
+                for dump in os.listdir(coredump_path):
+                    if dump.endswith('.gdb.txt'):
+                        with open(os.path.join(coredump_path, dump), 'r') as f:
+                            backtrace = f.read()
+                        ctx.summary['failure_reason'] += \
+                            '\n\nBacktrace from core {dump}:\n{bt}'.format(
+                                dump=dump,
+                                bt=backtrace
+                            )
 
 
 @contextlib.contextmanager

@@ -1,4 +1,5 @@
 from teuthology.task.internal import fetch_binaries_for_coredumps
+from teuthology.task.internal import get_backtraces_from_coredumps
 from unittest.mock import patch, Mock
 import gzip
 import os
@@ -19,6 +20,36 @@ def __init__(self, ret):
         def communicate(self, input=None):
             return [TestFetchCoreDumps.MockDecode(self.ret)]
 
+    class MockCompletedProcess(object):
+        def __init__(self, ret):
+            self.ret = ret
+
+        @property
+        def stdout(self):
+            return self.ret
+
+    class MockGdb(object):
+        def __init__(self, ret):
+            self.ret = ret
+
+        def run(self, *args, **kwargs):
+            return TestFetchCoreDumps.MockCompletedProcess(self.ret)
+
+    class TestGetBacktracesFromCoreDumps(object):
+        @patch('teuthology.task.internal.subprocess.run')
+        def test_get_backtraces_from_coredumps(self, mock_run):
+            mock_run.return_value = TestFetchCoreDumps.MockCompletedProcess(
+                "Backtrace line 1\nBacktrace line 2\nBacktrace line 3\n"
+            )
+            backtraces = get_backtraces_from_coredumps(coredump_path="core_dump_path", dump_path="binary_path", 
+                                                       dump_program="ceph_test_rados_api_io", dump="core_dump")
+            expected_backtraces = [
+                "Backtrace line 1",
+                "Backtrace line 2",
+                "Backtrace line 3"
+            ]
+            assert backtraces == expected_backtraces
+
     def setup_method(self):
         self.the_function = fetch_binaries_for_coredumps
         with gzip.open('file.gz', 'wb') as f:
@@ -44,6 +75,20 @@ def setup_method(self):
             " 19:56:56 2022, from Unix, original size modulo 2^32 11"
         )
 
+        # Centos 9 coredumps are zstd compressed:
+        self.zstd_compressed_correct = self.MockPopen(
+            "Zstandard compressed data"\
+            "'correct.format.core', last modified: Wed Jun 29"\
+            " 19:55:29 2022, from Unix, original size modulo 2^32 3167080"
+        )
+
+        self.zstd_compressed_incorrect = self.MockPopen(
+            "Zstandard compressed data"\
+            "'incorrect.format.core', last modified: Wed Jun 29"\
+            " 19:56:56 2022, from Unix, original size modulo 2^32 11"
+        )
+
+
     # Core is not compressed and file is in the correct format
     @patch('teuthology.task.internal.subprocess.Popen')
     @patch('teuthology.task.internal.os')
@@ -112,5 +157,39 @@ def test_compressed_incorrect_format(self, m_os, m_subproc_popen):
         self.the_function(None, self.m_remote)
         assert self.m_remote.get_file.called == False
 
+    # Core is zstd-compressed and file is in the correct format
+    @patch('teuthology.task.internal.subprocess.Popen')
+    @patch('teuthology.task.internal.os')
+    def test_zstd_compressed_correct_format(self, m_os, m_subproc_popen):
+        m_subproc_popen.side_effect = [
+                self.zstd_compressed_correct,
+                self.uncompressed_correct
+            ]
+        m_os.path.join.return_value = self.core_dump_path
+        m_os.path.sep = self.core_dump_path
+        m_os.path.isdir.return_value = True
+        m_os.path.dirname.return_value = self.core_dump_path
+        m_os.path.exists.return_value = True
+        m_os.listdir.return_value = [self.core_dump_path]
+        self.the_function(None, self.m_remote)
+        assert self.m_remote.get_file.called
+
+    # Core is compressed and file is in the wrong format
+    @patch('teuthology.task.internal.subprocess.Popen')
+    @patch('teuthology.task.internal.os')
+    def test_zstd_compressed_incorrect_format(self, m_os, m_subproc_popen):
+        m_subproc_popen.side_effect = [
+                self.zstd_compressed_incorrect,
+                self.uncompressed_incorrect
+            ]
+        m_os.path.join.return_value = self.core_dump_path
+        m_os.path.sep = self.core_dump_path
+        m_os.path.isdir.return_value = True
+        m_os.path.dirname.return_value = self.core_dump_path
+        m_os.path.exists.return_value = True
+        m_os.listdir.return_value = [self.core_dump_path]
+        self.the_function(None, self.m_remote)
+        assert self.m_remote.get_file.called == False
+
     def teardown(self):
-        os.remove(self.core_dump_path)
+        os.remove(self.core_dump_path)