mapreduce - luckystar1992/ERM GitHub Wiki

#--coding:utf-8--

import sys try: reload(sys) sys.setdefaultencoding("utf-8") except: pass import json

for index, line in enumerate(sys.stdin): try: line = line.strip() json_object = json.loads(line) # 抓取的id _id_object = json_object.get("_id") _id = _id_object.get("$oid")

	# 用户信息
	user_object = json_object.get("user")
	user_follow_count = user_object.get("follow_count")
	user_id_object = user_object.get("id")
	if type(user_id_object) == dict:
		user_id = user_id_object.get("$numberLong")
	elif type(user_id_object) in (int, str):
		user_id = user_id_object
	user_id = int(user_id)
	screen_name = user_object.get("screen_name")
	sys.stdout.write("%s %d %d %s\n" % (_id, user_follow_count, user_id, screen_name))
except:
	sys.stderr.write("[Map Error] line:%d\n" % (index))
	pass

#--coding:utf-8--

import sys try: reload(sys) sys.setdefaultencoding("utf-8") except: pass

user_set = set() for index, line in enumerate(sys.stdin): try: line = line.strip() user_follow_count, user_id = line.split(" ") user_set.add(user_follow_count) except: sys.stderr.write("[Reduce Error] line:%d\n" % (index))

sys.stdout.write("User Count: %d\n" % (len(user_set)))