Memory Fences - mhightower83/Arduino-ESP8266-misc GitHub Wiki
Memory Fences - What are they?
Keywords such as Memory Fences, Memory Barriers, membar, Compiler Barriers are often used to refer to the same thing. So far I have found this issue often defined in abstract terms with weak examples. It gets even more complicated when reading a fully detailed description that encompasses the prefetching, out-of-order execution, caching, ... Multiprocessor realm. The writeup LINUX KERNEL MEMORY BARRIERS appears to be a comprehensive discussion for that, TL;DR. However, my area of interest here is the single-threaded single core with possible interrupts scenario. I think the subtopic compiler barriers explicitly describes what I am looking for.
I think that there is more to be said here; however, for now, I will leave it with these references till another time.
If you read nothing else, this one is very much worth the time! Memory Ordering at Compile Time.
Another important topic to understand well: Implications of pure and constant functions
So far it appears a memory fence will result in the compiler not referencing registers set to memory values gathered before the fence. These values will be reloaded.
Compiler Memory Barriers:
- function calls are natural barriers provided they are not: inline, pure or const
asm volatile("":::"memory")
Initial References: https://people.cs.pitt.edu/~xianeizhang/notes/cpp11_mem.html https://github.com/esp8266/Arduino/issues/615 - more to do with IRQs enable/disable
Simple test
int just4Fun(int *a, int *b) {
uint32_t old_ps;
int result = *a;
old_ps = xt_rsil(15);
*b = *a;
xt_wsr_ps(old_ps);
return result;
}
Compiled to Xtensa assembly with no optimization, -O0
_Z8just4FunPiS_:
addi sp, sp, -48 // sp = sp - 48; // Reserve space on stack for variables
s32i.n a15, sp, 44 // *((int*)&sp[44]) = a15; // Backup reg a15 to stack
mov.n a15, sp // a15 = sp; // a15 will be used as a base pointer
s32i.n a2, a15, 16 // *((int*)&a15[16]) = a2; // save "a" (arg1) on the stack
s32i.n a3, a15, 20 // *((int*)&a15[20]) = a3; // save "b" (arg2) on the stack
l32i.n a2, a15, 16 // a2 = *((int*)&a15[16]); // Load pointer "a" from stack into register a2
l32i.n a2, a2, 0 // a2 = *(int*)a2; // effectivly a2 = *a;
s32i.n a2, a15, 0 // *(int*)a15 = a2; // Save a2 (result) to stack
# old_ps = xt_rsil(15);
rsil a2, 15 // a2 = ps;
// ps.intlevel = 15; // Disable interrupts
s32i.n a2, a15, 4 // *((int*)&a15[4]) = a2; // Save previous intlevel in ps
l32i.n a2, a15, 4 // a2 = *((int*)&a15[4]); // Load previous ps into a2
s32i.n a2, a15, 8 // *((int*)&a15[8]) = a2; // Save register PS to stack again in new location
# *b = *a;
l32i.n a2, a15, 16 // a2 = *((int*)&a15[16]); // get pointer "a" (arg1) from stack
l32i.n a3, a2, 0 // a3 = *(int*)a2; // load value pointed to by "a"
l32i.n a2, a15, 20 // a2 = *((int*)&a15[20]); // get pointer "b" (arg2) from stack
s32i.n a3, a2, 0 // *(int*)a2 = a3; // save value to memory pointed to by a2 (*b) +0
# xt_wsr_ps(old_ps);
l32i.n a2, a15, 8 // a2 = *((int*)&a15[8]); // Get back PS to restore INTLEVEL
wsr a2, ps // ps = a2; // Restore interrupts
isync
# return result;
l32i.n a2, a15, 0 // a2 = *(int*)a15; // get return value from stack
mov.n sp, a15 // sp = a15; // restore sp
l32i.n a15, sp, 44 // a15 = *((int*)sp[44]); // restore a15
addi sp, sp, 48 // sp = sp + 48; // restore sp for return
ret.n