|
求助啊
用rhadoop处理数据的时候发现数据的某些字段被莫名其妙的改变了
analysislog<-function(input,output,pattern="\\|"){
mapper<-function(.,records){
names(records)<-c("src_ip","dst_ip","src_port","dst_port","app_type","app_id","up_bytes","down_bytes",
"proto","ip_ver","begin_time","end_time")
data<-records[-1,]
keyval("key",data)
}
reducer<-function(key,data){
keyval(key,data)
}
mapreduce(input = input ,output = output,input.format=make.input.format("csv",sep="|"),
map = mapper, reduce = reducer,combine = T#,backend.parameters = list(hadoop = list(D ="mapred.reduce.tasks=8"))
)
}
analysislog("/log_test","/logtmp","\\|")
这样不做任何处理 只是把文件输出后 发现记录的某些字段改变了
原始数据:
root@ubuntu22:~/log# find app_stat-20140117-000571.r |xargs grep "000080fe000000000edd5679f93fdcfa"
000080fe000000000edd5679f93fdcfa|000002ff00000000000000000c000000|27655|27655|0|0|10906|0|17|6|2014.01.17 01:19:00|2014.01.17 01:19:50
000080fe000000000edd5679f93fdcfa|000002ff00000000000000000c000000|27655|27655|0|0|11114|0|17|6|2014.01.17 01:19:50|2014.01.17 01:19:54
000080fe000000000edd5679f93fdcfa|000002ff000000000000000003000100|5568|60180|0|0|336|0|17|6|2014.01.17 01:19:00|2014.01.17 01:20:01
000080fe000000000edd5679f93fdcfa|000002ff00000000000000000c000000|27655|27655|0|0|10923|0|17|6|2014.01.17 01:19:55|2014.01.17 01:20:02
000080fe000000000edd5679f93fdcfa|000002ff00000000000000000c000000|3821|27655|0|0|3600|0|17|6|2014.01.17 01:19:57|2014.01.17 01:20:13
经过rhadoop后:
hadoop@ubuntu22:/home/hadoop/Rhadoop$ find output |xargs grep "000080fe000000000edd5679f93fdcfa"
"000080fe000000000edd5679f93fdcfa","000002ff00000000000000000c000000",27655,27655,0,0,10923,0,17,6,"2014.01.17 01:19:55","2014.01.17 01:20:02"
"000080fe000000000edd5679f93fdcfa","000002ff00000000000000000c000000",3821,27655,0,0,3600,0,17,6,"2014.01.17 01:19:57","2014.01.17 01:20:13"
"000080fe000000000edd5679f93fdcfa","000002ff00000000000000000c000000",180,92,1,1,30,1,1,2,"2014.01.17 01:19:00","2014.01.17 01:19:50"
"000080fe000000000edd5679f93fdcfa","000002ff00000000000000000c000000",180,92,1,1,37,1,1,2,"2014.01.17 01:19:50","2014.01.17 01:19:54"
"000080fe000000000edd5679f93fdcfa","000002ff000000000000000003000100",494,185,1,1,238,1,1,2,"2014.01.17 01:19:00","2014.01.17 01:20:01"
求大神指点啊!!!!! |
|
|