Using memcopy instruction for block data transfers - retrotruestory/M1DEV GitHub Wiki

Here's how to implement efficient block data transfers using Magic-1's memcopy instruction:

        .cseg
        .global _block_transfer
        .global _block_transfer_aligned

// Fast block transfer using hardware memcopy
// void block_transfer(void* dst, void* src, unsigned int count)
_block_transfer:
        enter   8           ; Setup stack frame
        push    a           ; Save registers
        push    b
        push    c
        
        ld.16   b,12(sp)   ; Load destination to B
        ld.16   a,14(sp)   ; Load source to A
        ld.16   c,16(sp)   ; Load count to C

        // Use hardware memcopy instruction
        memcopy            ; Transfers C bytes from (A) to (B)
        
        pop     c          ; Restore registers
        pop     b
        pop     a
        leave
        ret

// Optimized block transfer for 512-byte aligned blocks
// void block_transfer_aligned(void* dst, void* src)
_block_transfer_aligned:
        enter   4
        push    a
        push    b
        
        ld.16   b,6(sp)    ; Load destination
        ld.16   a,8(sp)    ; Load source
        ld.16   c,#512     ; Fixed block size
        
        memcopy            ; Hardware assisted copy
        
        pop     b
        pop     a
        leave
        ret

Create a C header:

#ifndef BLOCK_TRANSFER_H
#define BLOCK_TRANSFER_H

// Block transfer functions
extern void block_transfer(void* dst, void* src, unsigned int count);
extern void block_transfer_aligned(void* dst, void* src);

#endif

Example usage:

#include <stdio.h>
#include "block_transfer.h"

#define BLOCK_SIZE 512

int main() {
    char src_buffer[BLOCK_SIZE];
    char dst_buffer[BLOCK_SIZE];
    int i;

    // Initialize source buffer
    for (i = 0; i < BLOCK_SIZE; i++) {
        src_buffer[i] = i & 0xFF;
    }

    // Test standard block transfer
    block_transfer(dst_buffer, src_buffer, BLOCK_SIZE);

    // Test aligned transfer
    block_transfer_aligned(dst_buffer, src_buffer);

    return 0;
}

Update Makefile:

CC = clcc
AS = m1_as
CFLAGS = -O2
OBJECTS = block_transfer.o test_transfer.o

test_transfer: $(OBJECTS)
	$(CC) $(CFLAGS) -o $@ $(OBJECTS)

%.o: %.s
	$(AS) -o $@ $<

%.o: %.c
	$(CC) $(CFLAGS) -c $<

clean:
	rm -f test_transfer *.o

Key features:

  1. Uses hardware memcopy instruction for optimal performance
  2. Provides both variable-size and fixed-size (512-byte) transfers
  3. Preserves registers properly
  4. Aligns with Magic-1's stack frame conventions

This implementation is particularly efficient for:

  • Disk I/O operations (512-byte sectors)
  • Memory-to-memory copies
  • DMA-like operations
  • Buffer transfers
⚠️ **GitHub.com Fallback** ⚠️