Memory Fences - mhightower83/Arduino-ESP8266-misc GitHub Wiki

Memory Fences - What are they?

Keywords such as Memory Fences, Memory Barriers, membar, Compiler Barriers are often used to refer to the same thing. So far I have found this issue often defined in abstract terms with weak examples. It gets even more complicated when reading a fully detailed description that encompasses the prefetching, out-of-order execution, caching, ... Multiprocessor realm. The writeup LINUX KERNEL MEMORY BARRIERS appears to be a comprehensive discussion for that, TL;DR. However, my area of interest here is the single-threaded single core with possible interrupts scenario. I think the subtopic compiler barriers explicitly describes what I am looking for.

I think that there is more to be said here; however, for now, I will leave it with these references till another time.

If you read nothing else, this one is very much worth the time! Memory Ordering at Compile Time.

Another important topic to understand well: Implications of pure and constant functions

So far it appears a memory fence will result in the compiler not referencing registers set to memory values gathered before the fence. These values will be reloaded.

Compiler Memory Barriers:

  • function calls are natural barriers provided they are not: inline, pure or const
  • asm volatile("":::"memory")

Initial References: https://people.cs.pitt.edu/~xianeizhang/notes/cpp11_mem.html https://github.com/esp8266/Arduino/issues/615 - more to do with IRQs enable/disable

Simple test

int just4Fun(int *a, int *b) {
  uint32_t old_ps;
  int result = *a;
  old_ps = xt_rsil(15);
  *b = *a;
  xt_wsr_ps(old_ps);
  return result;
}

Compiled to Xtensa assembly with no optimization, -O0

_Z8just4FunPiS_:
        addi    sp, sp, -48      // sp = sp - 48;           // Reserve space on stack for variables
        s32i.n  a15, sp, 44      // *((int*)&sp[44]) = a15; // Backup reg a15 to stack
        mov.n   a15, sp          // a15 = sp;               // a15 will be used as a base pointer
        s32i.n  a2, a15, 16      // *((int*)&a15[16]) = a2; // save "a" (arg1) on the stack
        s32i.n  a3, a15, 20      // *((int*)&a15[20]) = a3; // save "b" (arg2) on the stack
        l32i.n  a2, a15, 16      // a2 = *((int*)&a15[16]); // Load pointer "a" from stack into register a2
        l32i.n  a2, a2, 0        // a2 = *(int*)a2;         // effectivly a2 = *a;
        s32i.n  a2, a15, 0       // *(int*)a15 = a2;        // Save a2 (result) to stack

#   old_ps = xt_rsil(15);
        rsil    a2, 15           // a2 = ps;
                                 // ps.intlevel = 15;       // Disable interrupts
        s32i.n  a2, a15, 4       // *((int*)&a15[4]) = a2;  // Save previous intlevel in ps
        l32i.n  a2, a15, 4       // a2 = *((int*)&a15[4]);  // Load previous ps into a2
        s32i.n  a2, a15, 8       // *((int*)&a15[8]) = a2;  // Save register PS to stack again in new location

#   *b = *a;
        l32i.n  a2, a15, 16      // a2 = *((int*)&a15[16]);  // get pointer "a" (arg1) from stack
        l32i.n  a3, a2, 0        // a3 = *(int*)a2;          // load value pointed to by "a"
        l32i.n  a2, a15, 20      // a2 = *((int*)&a15[20]);  // get pointer "b" (arg2) from stack
        s32i.n  a3, a2, 0        // *(int*)a2 = a3;          // save value to memory pointed to by a2 (*b) +0

#   xt_wsr_ps(old_ps);
        l32i.n  a2, a15, 8       // a2 = *((int*)&a15[8]);   // Get back PS to restore INTLEVEL
        wsr     a2, ps           // ps = a2;                 // Restore interrupts
        isync

#   return result;
        l32i.n  a2, a15, 0       // a2 = *(int*)a15;         // get return value from stack
        mov.n   sp, a15          // sp = a15;                // restore sp
        l32i.n  a15, sp, 44      // a15 = *((int*)sp[44]);   // restore a15
        addi    sp, sp, 48       // sp = sp + 48;            // restore sp for return
        ret.n