cbsm_origin_conv - Joejiong/buddy-mlir GitHub Wiki

#map0 = affine_map<(d0) -> (d0)>
#map1 = affine_map<(d0) -> (d0 ceildiv 256)>
module  {
  func @conv_2d(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    
    %0 = memref.dim %arg1, %c0 : memref<?x?xf32> // H
    %1 = memref.dim %arg1, %c1 : memref<?x?xf32> // W
    
    %2 = memref.dim %arg2, %c0 : memref<?x?xf32> //FH
    %3 = memref.dim %arg2, %c1 : memref<?x?xf32> //FW
    
    
    affine.for %arg3 = #map0(%c0) to #map0(%2) {       // a3 : 0-fh
      affine.for %arg4 = #map0(%c0) to #map0(%0) {     // a4 : 0-h
        affine.for %arg5 = #map0(%c0) to #map0(%1) {   // a5 : 0-w
          affine.for %arg6 = #map0(%c0) to #map1(%3) { // a6 : 0-up[fw/256]
          	
          	// f4 = vector.load(filter[h,w])
            %4 = affine.vector_load %arg1[%arg4, %arg5] : memref<?x?xf32>, vector<1xf32> 	
            
            // vec.bcast(vector.load(filter[h,w])) 1-256
            %5 = vector.broadcast %4 : vector<1xf32> to vector<256xf32>
            
            // %6=vec.load256(img[fh+h, w+fw*256])
            %6 = affine.vector_load %arg0[%arg3 + %arg4, %arg5 + %arg6 * 256] : memref<?x?xf32>, vector<256xf32>
            
            // fi = vector.load(out[fw, up(fw/256)*256])
            %7 = affine.vector_load %arg2[%arg3, %arg6 * 256] : memref<?x?xf32>, vector<256xf32>
            
            // vec.fma(vec.load256(img[fh+h, w+fw*256])*vec.bcast(vector.load(filter[h,w]))
            //                                         +vec.load(out[fw, up(fw/256)*256]))
            %8 = vector.fma %6, %5, %7 : vector<256xf32>
            
            // out[fh,up(fw/256)*256]
            affine.vector_store %8, %arg2[%arg3, %arg6 * 256] : memref<?x?xf32>, vector<256xf32>
            
          }
        }
      }
    }
    return
  }
}
⚠️ **GitHub.com Fallback** ⚠️