pw_conv_2d_nhwc_hwcf_cbsm - Joejiong/buddy-mlir GitHub Wiki

TODO problems:

  • map kc to map0
  • error: 'affine.vector_load' op affine map num results must equal memref rank %6 = affine.vector_load %input[%on, 0+%oh, 0+%ow_256*256] : memref<?x?x?x?xf32>, vector<256xf32>
  • entry point notfound with:
/Workspace/buddy-mlir/llvm/build/bin/mlir-opt 7_loop_pw_conv_vec_template.mlir -convert-linalg-to-loops -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | /Workspace/buddy-mlir/llvm/build/bin/mlir-cpu-runner -e main -entry-point-result=void -shared-libs=/Workspace/buddy-mlir/llvm/build/lib/libmlir_runner_utils.so

solved with 4 index.

#map0 = affine_map<(d0) -> (d0)>
#map1 = affine_map<(d0) -> (d0 ceildiv 256)>
module  {
  func @pointwise_conv_2d_nhwc_hwcf(%input: memref<?x?x?x?xf32>, %filter: memref<?x?x?x?xf32>, %output: memref<?x?x?x?xf32>) {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %c3 = arith.constant 3 : index
    
    %KH = memref.dim %filter, %c0 : memref<?x?x?x?xf32> // FH
    %KW = memref.dim %filter, %c1 : memref<?x?x?x?xf32> // FW
    %KC = memref.dim %filter, %c2 : memref<?x?x?x?xf32> // FC
    
    %ON = memref.dim %output, %c1 : memref<?x?x?x?xf32> // ON
    %OH = memref.dim %output, %c1 : memref<?x?x?x?xf32> // OH
    %OW = memref.dim %output, %c0 : memref<?x?x?x?xf32> // OW

    %OF = memref.dim %output, %c2 : memref<?x?x?x?xf32> // OF

    affine.for %on = #map0(%c0) to #map0(%ON) {           // on : 0-on(batch)

        affine.for %of = #map0(%c0) to #map0(%OF) {          // of : 0-of

            affine.for %kc = #map0(%c0) to #map0(%KC) {          // kc : 0-kc (need to add)

                affine.for %oh = #map0(%c0) to #map0(%OH) {       // a3 : 0-oh
                    
                    affine.for %ow_256 = #map0(%c0) to #map1(%OW) { // a6 : 0-up[ow/256]
                        // f4 = vector.load(filter[fh,fw,?fc,of]) (对于所有kc只有一个值of)
                        %4 = affine.vector_load %filter[0,0,%kc,%of] : memref<?x?x?x?xf32>, vector<1xf32> 	

                        // vec.bcast(vector.load(filter[fh,fw,kc,of])) 1-256
                        %5 = vector.broadcast %4 : vector<1xf32> to vector<256xf32>
                        
                        // %6=vec.load256(img[on, fh+oh, ow+fw*256,kc])
                        %6 = affine.vector_load %input[%on, 0+%oh, 0+%ow_256*256, %kc] : memref<?x?x?x?xf32>, vector<256xf32>
                        
                        // fi = vector.load(out[on, oh, up(ow/256)*256], of)
                        %7 = affine.vector_load %output[%on, %oh, %ow_256 * 256, %of] : memref<?x?x?x?xf32>, vector<256xf32>
                        
                        // vec.fma(vec.load256(img[fh+oh, fw+ow*256])*vec.bcast(vector.load(filter[fh,fw]))
                        //                                         +vec.load(out[ow, up(ow/256)*256]))
                        %8 = vector.fma %6, %5, %7 : vector<256xf32>
                        
                        // out[oh,up(ow/256)*256]
                        affine.vector_store %8, %output[%on, %oh, %ow_256 * 256, %of] : memref<?x?x?x?xf32>, vector<256xf32>    
                    }
                }
            }
        }    
    }
    return
  }
}
⚠️ **GitHub.com Fallback** ⚠️