mapreduce join总结(多对多内连接,左外连接,右外连接,全连接)
在reduce端做join,以TextPair做key,TextPair.getSecond为源文件类型区分,reduce端提取出来赋值给变量bz
a join b
//a放进内存listwhile(hasNext()) if bz=0 list.add() else { for clist :list write clist next }
a left outer join b
int count=0;//a放进内存listwhile(hasNext()) if bz=0 list.add() else {count++; //记录该key在b表的记录数 for clist :list write clist next }if count==0 //如果b表没有记录,则输出nullfor clist :list write clist null//b放进内存list的写法while(hasNext()) if bz=0 list.add() //b表的bz=0,b表放进list内存 else { if list.size==0 //如果b表没有对应该key的记录,则输出nullwrite next nullelse //有则循环输出for clist:list write next clist}
a full outer join b
int count=0;//a放进内存listwhile(hasNext()) if bz==0 list.add() else {count++; //记录该key在b表的记录数if list.size==0 //如果a表没有记录 write null nextelse for clist :list write clist next }if count==0 //如果b表没有记录,则输出nullfor clist :list write clist null
a right outer join b
int count=0;//a放进内存listwhile(hasNext()) if bz=0 list.add() else {count++; //记录该key在b表的记录数if list.size==0write null nextelse for clist :list write clist next