读文件之分片读 - alipay/rdf-file GitHub Wiki
对于大文件,一般先对文件进行分片,在利用集群每个集群读取部分数据
100|300.03
seq_0|inst_seq_0|2013-11-09 12:34:56|20131109|20131112 12:23:34|23.33|10.22|22|12345|true|demo
seq_2|inst_seq_1|2013-11-10 15:56:12|20131110|20131113 12:33:34|23.34|11.88|33|56789|false|
seq_3|inst_seq_0|2013-11-09 12:34:56|20131109|20131112 12:23:34|23.33|10.22|22|12345|true|demo
seq_4|inst_seq_1|2013-11-10 15:56:12|20131110|20131113 12:33:34|23.34|11.88|33|56789|false|
seq_5|inst_seq_0|2013-11-09 12:34:56|20131109|20131112 12:23:34|23.33|10.22|22|12345|true|demo
seq_6|inst_seq_1|2013-11-10 15:56:12|20131110|20131113 12:33:34|23.34|11.88|33|56789|false|
seq_7|inst_seq_0|2013-11-09 12:34:56|20131109|20131112 12:23:34|23.33|10.22|22|12345|true|demo
seq_8|inst_seq_1|2013-11-10 15:56:12|20131110|20131113 12:33:34|23.34|11.88|33|56789|false|
seq_9|inst_seq_0|2013-11-09 12:34:56|20131109|20131112 12:23:34|23.33|10.22|22|12345|true|demo
seq_10|inst_seq_1|2013-11-10 15:56:12|20131110|20131113 12:33:34|23.34|11.88|33|56789|false|
OFDCFEND|20131109|100
{
"head":[
"totalCount|总笔数|Required|Long",
"totalAmount|总金额|BigDecimal|Required"
],
"body":[
"seq|流水号",
"instSeq|基金公司订单号|Required",
"gmtApply|订单申请时间|Date:yyyy-MM-dd HH:mm:ss",
"date|普通日期|Date:yyyyMMdd",
"dateTime|普通日期时间|Date:yyyyMMdd HH:mm:ss",
"applyNumber|普通数字|BigDecimal",
"amount|金额|BigDecimal",
"age|年龄|Integer",
"longN|长整型|Long",
"bol|布尔值|Boolean",
"memo|备注"
],
"tail": [
"fileEnd|数据文件尾部字符",
"date|普通日期|Date:yyyyMMdd",
"amount|金额|BigDecimal"
],
"protocol":"SP"
}
SP组件内置协议
- 文件结构分割
String filePath = File.class.getResource("/reader/sp/data/data_split.txt").getPath();
FileConfig config = new FileConfig(filePath, "/reader/sp/template/template3.json", new StorageConfig("nas"));
// 创建分解分割器
FileSplitter splitter = FileFactory.createSplitter(config.getStorageConfig());
// 获取头分片
FileSlice headSlice = splitter.getHeadSlice(config);
// 读取头分片
FileConfig headConfig = config.clone();
headConfig.setPartial(headSlice.getStart(), headSlice.getLength(),
headConfig.getFileDataType());
FileReader headReader = FileFactory.createReader(headConfig);
try {
Map<String, Object> head = headReader.readHead(HashMap.class);
System.out.println(head);
} finally {
headReader.close();
}
// 获取数据内容分片
FileSlice bodySlice = splitter.getBodySlice(config);
// 读取数据内容分片
FileConfig bodyConfig = config.clone();
bodyConfig.setPartial(bodySlice.getStart(), bodySlice.getLength(),
bodySlice.getFileDataType());
FileReader bodyReader = FileFactory.createReader(bodyConfig);
try {
Map<String, Object> row = null;
while (null != (row = bodyReader.readRow(HashMap.class))) {
System.out.println(row);
}
} finally {
bodyReader.close();
}
// 获取tail分片
FileSlice tailSlice = splitter.getTailSlice(config);
// 读取tail分片
FileConfig tailConfig = config.clone();
tailConfig.setPartial(tailSlice.getStart(), tailSlice.getLength(), tailSlice.getFileDataType());
FileReader tailReader = FileFactory.createReader(tailConfig);
try {
Map<String, Object> tail = tailReader.readTail(HashMap.class);
System.out.println(tail);
} finally {
tailReader.close();
}
- body按大小分片
String filePath = File.class.getResource("/reader/sp/data/data_split.txt").getPath();
FileConfig config = new FileConfig(filePath, "/reader/sp/template/template3.json", new StorageConfig("nas"));
// 创建分片处理器
FileSplitter splitter = FileFactory.createSplitter(config.getStorageConfig());
// body 按大小分片
List<FileSlice> slices = splitter.getBodySlices(config, 256);
System.out.println(slices.size());
// 分片读取数据
for (FileSlice slice : slices) {
FileConfig sliceConfig = config.clone();
sliceConfig.setPartial(slice.getStart(), slice.getLength(), slice.getFileDataType());
FileReader reader = FileFactory.createReader(sliceConfig);
try {
Map<String, Object> row = null;
while (null != (row = reader.readRow(HashMap.class))) {
System.out.println(row);
}
} finally {
reader.close();
}
}