mapreduce - luckystar1992/ERM GitHub Wiki
#--coding:utf-8--
import sys try: reload(sys) sys.setdefaultencoding("utf-8") except: pass import json
for index, line in enumerate(sys.stdin): try: line = line.strip() json_object = json.loads(line) # 抓取的id _id_object = json_object.get("_id") _id = _id_object.get("$oid")
# 用户信息
user_object = json_object.get("user")
user_follow_count = user_object.get("follow_count")
user_id_object = user_object.get("id")
if type(user_id_object) == dict:
user_id = user_id_object.get("$numberLong")
elif type(user_id_object) in (int, str):
user_id = user_id_object
user_id = int(user_id)
screen_name = user_object.get("screen_name")
sys.stdout.write("%s %d %d %s\n" % (_id, user_follow_count, user_id, screen_name))
except:
sys.stderr.write("[Map Error] line:%d\n" % (index))
pass
#--coding:utf-8--
import sys try: reload(sys) sys.setdefaultencoding("utf-8") except: pass
user_set = set() for index, line in enumerate(sys.stdin): try: line = line.strip() user_follow_count, user_id = line.split(" ") user_set.add(user_follow_count) except: sys.stderr.write("[Reduce Error] line:%d\n" % (index))
sys.stdout.write("User Count: %d\n" % (len(user_set)))