fix(dmac): Use inline asm for compiler fences in DMAC

jbeaurivage · jbeaurivage · commit f24948eec53d · 2026-01-25T00:04:05.000-05:00
Signed-off-by: Justin Beaurivage &lt;code@beaurivage.io&gt;
diff --git a/hal/src/dmac/async_api.rs b/hal/src/dmac/async_api.rs
@@ -1,7 +1,6 @@
 //! APIs for async DMAC operations.
 
 use atsamd_hal_macros::hal_cfg;
-use core::sync::atomic;
 
 use crate::{
     async_hal::interrupts::{DMAC, Handler},
@@ -55,11 +54,7 @@ impl Handler<DMAC> for InterruptHandler {
                         core::hint::spin_loop();
                     }
 
-                    // Prevent the compiler from re-ordering read/write
-                    // operations beyond this fence.
-                    // (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations)
-                    atomic::fence(atomic::Ordering::Acquire); // ▼
-
+                    super::channel::release_fence(); // ▲
                     WAKERS[pend_channel as usize].wake();
                 }
             }
@@ -112,10 +107,7 @@ impl Handler<DMAC> for InterruptHandler {
                     core::hint::spin_loop();
                 }
 
-                // Prevent the compiler from re-ordering read/write
-                // operations beyond this fence.
-                // (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations)
-                atomic::fence(atomic::Ordering::Acquire); // ▼
+                super::channel::acquire_fence(); // ▼
 
                 WAKERS[channel].wake();
             }
diff --git a/hal/src/dmac/channel/mod.rs b/hal/src/dmac/channel/mod.rs
@@ -34,7 +34,6 @@
 #![allow(unused_braces)]
 
 use core::marker::PhantomData;
-use core::sync::atomic;
 
 use atsamd_hal_macros::{hal_cfg, hal_macro_helper};
 
@@ -271,10 +270,7 @@ impl<Id: ChId, S: Status> Channel<Id, S> {
     /// Enable the transfer, and emit a compiler fence.
     #[inline]
     fn _enable_private(&mut self) {
-        // Prevent the compiler from re-ordering read/write
-        // operations beyond this fence.
-        // (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations)
-        atomic::fence(atomic::Ordering::Release); // ▲
+        release_fence(); // ▲
         self.regs.chctrla.modify(|_, w| w.enable().set_bit());
     }
 
@@ -288,10 +284,7 @@ impl<Id: ChId, S: Status> Channel<Id, S> {
             core::hint::spin_loop();
         }
 
-        // Prevent the compiler from re-ordering read/write
-        // operations beyond this fence.
-        // (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations)
-        atomic::fence(atomic::Ordering::Acquire); // ▼
+        acquire_fence(); // ▼
     }
 
     /// Returns whether or not the transfer is complete.
@@ -882,3 +875,33 @@ pub(crate) unsafe fn write_descriptor<Src: Buffer, Dst: Buffer<Beat = Src::Beat>
         btctrl,
     };
 }
+
+/// Prevent the compiler from re-ordering read/write
+/// operations beyond this function.
+/// (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations)
+#[inline(always)]
+pub(super) fn acquire_fence() {
+    // TODO: Seems like compiler fences aren't enough to guarantee memory accesses won't be reordered.
+    // (see https://users.rust-lang.org/t/compiler-fence-dma/132027)
+    // core::sync::atomic::fence(core::sync::atomic::Ordering::Acquire); // ▼
+
+    // Apparently, the only truly foolproof way to prevent reordering is with inline asm
+    unsafe {
+        core::arch::asm!("dmb");
+    }
+}
+
+/// Prevent the compiler from re-ordering read/write
+/// operations beyond this function.
+/// (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations)
+#[inline(always)]
+pub(super) fn release_fence() {
+    // TODO: Seems like compiler fences aren't enough to guarantee memory accesses won't be reordered.
+    // (see https://users.rust-lang.org/t/compiler-fence-dma/132027)
+    // core::sync::atomic::fence(atomic::Ordering::Release); // ▲
+
+    // Apparently, the only truly foolproof way to prevent reordering is with inline asm
+    unsafe {
+        core::arch::asm!("dmb");
+    }
+}
diff --git a/hal/src/dmac/channel/reg.rs b/hal/src/dmac/channel/reg.rs
@@ -18,10 +18,9 @@ use paste::paste;
 
 use crate::pac::{
     self, Dmac, Peripherals,
-    dmac::{Busych, Intstatus, Pendch, Swtrigctrl},
     dmac::{
-        busych::BusychSpec, intstatus::IntstatusSpec, pendch::PendchSpec,
-        swtrigctrl::SwtrigctrlSpec,
+        Busych, Intstatus, Pendch, Swtrigctrl, busych::BusychSpec, intstatus::IntstatusSpec,
+        pendch::PendchSpec, swtrigctrl::SwtrigctrlSpec,
     },
 };
 
@@ -324,9 +323,6 @@ impl<Id: ChId> Drop for RegisterBlock<Id> {
             core::hint::spin_loop();
         }
 
-        // Prevent the compiler from re-ordering read/write
-        // operations beyond this fence.
-        // (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations)
-        core::sync::atomic::fence(core::sync::atomic::Ordering::Acquire); // ▼
+        crate::dmac::channel::acquire_fence(); // ▼
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,6 @@`
`1`	`1`	`//! APIs for async DMAC operations.`
`2`	`2`
`3`	`3`	`use atsamd_hal_macros::hal_cfg;`
`4`		`-use core::sync::atomic;`
`5`	`4`
`6`	`5`	`use crate::{`
`7`	`6`	`async_hal::interrupts::{DMAC, Handler},`
`@@ -55,11 +54,7 @@ impl Handler<DMAC> for InterruptHandler {`
`55`	`54`	`core::hint::spin_loop();`
`56`	`55`	`}`
`57`	`56`
`58`		`- // Prevent the compiler from re-ordering read/write`
`59`		`- // operations beyond this fence.`
`60`		`- // (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations)`
`61`		`- atomic::fence(atomic::Ordering::Acquire); // ▼`
`62`		`-`
	`57`	`+ super::channel::release_fence(); // ▲`
`63`	`58`	`WAKERS[pend_channel as usize].wake();`
`64`	`59`	`}`
`65`	`60`	`}`
`@@ -112,10 +107,7 @@ impl Handler<DMAC> for InterruptHandler {`
`112`	`107`	`core::hint::spin_loop();`
`113`	`108`	`}`
`114`	`109`
`115`		`- // Prevent the compiler from re-ordering read/write`
`116`		`- // operations beyond this fence.`
`117`		`- // (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations)`
`118`		`- atomic::fence(atomic::Ordering::Acquire); // ▼`
	`110`	`+ super::channel::acquire_fence(); // ▼`
`119`	`111`
`120`	`112`	`WAKERS[channel].wake();`
`121`	`113`	`}`