Facebook - ActoKids/web-crawler GitHub Wiki

The Facebook Crawler scrapes Facebook all events information created by a Facebook user.

Dependencies

Python - https://www.python.org/downloads/

How it works

Facebook data crawling use Facebook Graph API with user access token to scrap the data created only by that user. Install Python and Pip. Before you run the fbEventCrawler.py, your need to have a Facebook account, an access token, create some facebook events on your facebook account for testing.

  1. You need to go to terminal and install the following libraries:
    • pip install facebook-sdk
    • pip3 install requests
  2. You need to have a facebook account, if not create one -->
    • Go to link developers.facebook.com, create an account there.
    • Go to link developers.facebook.com/tools/explorer.
    • Go to “My apps” drop down in the top right corner and select “add a new app”. Choose a display name and a category and then “Create App ID”.
    • Again get back to the same link developers.facebook.com/tools/explorer. You will see “Graph API Explorer” below “My Apps” in the top right corner. From “Graph API Explorer” drop down, select your app. Then, select “Get Token”. From this drop down, select “Get User Access Token”. Select permissions from the menu that appears and then select “Get Access Token.”
    • Go to link developers.facebook.com/tools/accesstoken. Select “Debug” corresponding to “User Token”. Go to “Extend Token Access”. This will ensure that your token does not expire every two hours.
    • Copy the token and paste it in the fbEventCrawl.py where it says "YOUR_ACCESS_TOKEN"
    • Go to your facebook page and create some events to test out the scrawling of data.

Finally, you can start running the fbEventCrawler.py the see if the output to data.txt matches with your events info you created on your page.

Example output

 {
	"events": {
		"data": [{
			"attending_count": 1,
			"can_guests_invite": true,
			"cover": {
				"offset_x": 50,
				"offset_y": 50,
				"source": "https://scontent.xx.fbcdn.net/v/t1.0-9/50879295_103587214091040_1721855863594942464_n.jpg?_nc_cat=108&_nc_ht=scontent.xx&oh=cbaabb573b4e74142447f7323564cf7e&oe=5CB49127",
				"id": "103587210757707"
			},
			"declined_count": 0,
			"description": "Cloud test - Customer demo",
			"discount_code_enabled": false,
			"end_time": "2019-03-20T20:00:00-0700",
			"guest_list_enabled": true,
			"interested_count": 0,
			"is_canceled": false,
			"is_draft": false,
			"is_page_owned": false,
			"maybe_count": 0,
			"name": "ActoKids - Sprint 5",
			"noreply_count": 0,
			"owner": {
				"name": "Dao Nguyen",
				"id": "103592304090531"
			},
			"place": {
				"name": "Seattle, WA",
				"location": {
					"city": "Seattle",
					"country": "United States",
					"latitude": 47.6046,
					"longitude": -122.3308,
					"state": "WA"
				},
				"id": "110843418940484"
			},
			"start_time": "2019-03-20T17:00:00-0700",
			"timezone": "America/Los_Angeles",
			"type": "public",
			"updated_time": "2019-01-22T00:25:46+0000",
			"id": "994429640753447"
		}, {
			"attending_count": 1,
			"can_guests_invite": true,
			"cover": {
				"offset_x": 50,
				"offset_y": 50,
				"source": "https://scontent.xx.fbcdn.net/v/t1.0-9/q84/s720x720/49271353_103586090757819_6531866231752884224_o.jpg?_nc_cat=106&_nc_ht=scontent.xx&oh=290d4e3a2548e219a1c41f20f5b50ce6&oe=5D004E73",
				"id": "103586084091153"
			},
			"declined_count": 0,
			"description": "Cloud Test - Sprint 4",
			"discount_code_enabled": false,
			"end_time": "2019-03-06T20:00:00-0800",
			"guest_list_enabled": true,
			"interested_count": 0,
			"is_canceled": false,
			"is_draft": false,
			"is_page_owned": false,
			"maybe_count": 0,
			"name": "ActoKids - Sprint 4",
			"noreply_count": 0,
			"owner": {
				"name": "Dao Nguyen",
				"id": "103592304090531"
			},
			"place": {
				"name": "Tommy Bahama - U Village",
				"location": {
					"city": "Seattle",
					"country": "United States",
					"latitude": 47.661989359326,
					"longitude": -122.29958680996,
					"state": "WA",
					"street": "University Village",
					"zip": "98105"
				},
				"id": "162137857153959"
			},
			"start_time": "2019-03-06T17:00:00-0800",
			"timezone": "America/Los_Angeles",
			"type": "public",
			"updated_time": "2019-01-22T00:24:34+0000",
			"id": "138813353723382"
		}, {
			"attending_count": 1,
			"can_guests_invite": true,
			"cover": {
				"offset_x": 50,
				"offset_y": 50,
				"source": "https://scontent.xx.fbcdn.net/v/t1.0-0/p180x540/50634380_103585557424539_7105767763368476672_o.jpg?_nc_cat=103&_nc_ht=scontent.xx&oh=1ba2ee7fd365ce17c275fa05a8d8577a&oe=5CBBFD28",
				"id": "103585550757873"
			},
			"declined_count": 0,
			"description": "This is for Cloud Practicum test - Sprint 3",
			"discount_code_enabled": false,
			"end_time": "2019-02-20T20:00:00-0800",
			"guest_list_enabled": true,
			"interested_count": 0,
			"is_canceled": false,
			"is_draft": false,
			"is_page_owned": false,
			"maybe_count": 0,
			"name": "ActoKids - Sprint 3",
			"noreply_count": 0,
			"owner": {
				"name": "Dao Nguyen",
				"id": "103592304090531"
			},
			"place": {
				"name": "Everett, WA",
				"location": {
					"city": "Everett",
					"country": "United States",
					"latitude": 47.9792,
					"longitude": -122.2046,
					"state": "WA",
					"zip": "98201, 98203, 98204, 98206, 98207, 98208, 98213"
				},
				"id": "105792912788161"
			},
			"start_time": "2019-02-20T17:00:00-0800",
			"timezone": "America/Los_Angeles",
			"type": "public",
			"updated_time": "2019-01-22T00:23:00+0000",
			"id": "611818705931022"
		}, {
			"attending_count": 1,
			"can_guests_invite": true,
			"cover": {
				"offset_x": 50,
				"offset_y": 50,
				"source": "https://scontent.xx.fbcdn.net/v/t1.0-9/s720x720/50796888_103585074091254_8414767099611709440_n.jpg?_nc_cat=110&_nc_ht=scontent.xx&oh=b80e30e930779b0520bab06811f6f0ac&oe=5CBE6A7D",
				"id": "103585070757921"
			},
			"declined_count": 0,
			"description": "Cloud Practicum - Sprint 2 Test",
			"discount_code_enabled": false,
			"end_time": "2019-02-06T20:00:00-0800",
			"guest_list_enabled": true,
			"interested_count": 0,
			"is_canceled": false,
			"is_draft": false,
			"is_page_owned": false,
			"maybe_count": 0,
			"name": "ActoKids - Sprint2",
			"noreply_count": 0,
			"owner": {
				"name": "Dao Nguyen",
				"id": "103592304090531"
			},
			"place": {
				"name": "Lynwood, Seattle WA",
				"location": {
					"city": "Serene",
					"country": "United States",
					"latitude": 47.861989983402,
					"longitude": -122.29534107272,
					"state": "WA"
				},
				"id": "373797586018999"
			},
			"start_time": "2019-02-06T17:00:00-0800",
			"timezone": "America/Los_Angeles",
			"type": "public",
			"updated_time": "2019-01-22T00:21:34+0000",
			"id": "293423794690932"
		}, {
			"attending_count": 1,
			"can_guests_invite": true,
			"cover": {
				"offset_x": 50,
				"offset_y": 50,
				"source": "https://scontent.xx.fbcdn.net/v/t1.0-9/50407301_103581844091577_913716947483361280_n.jpg?_nc_cat=104&_nc_ht=scontent.xx&oh=d0e30582f7e7c12777368d2b213caa5e&oe=5CB910AB",
				"id": "103581840758244"
			},
			"declined_count": 0,
			"description": "Cloud Practicum 2019 - Sprint 1",
			"discount_code_enabled": false,
			"end_time": "2019-01-23T20:00:00-0800",
			"guest_list_enabled": true,
			"interested_count": 0,
			"is_canceled": false,
			"is_draft": false,
			"is_page_owned": false,
			"maybe_count": 0,
			"name": "ActoKids - Sprint1 Test",
			"noreply_count": 0,
			"owner": {
				"name": "Dao Nguyen",
				"id": "103592304090531"
			},
			"place": {
				"name": "Seattle, WA",
				"location": {
					"city": "Seattle",
					"country": "United States",
					"latitude": 47.6046,
					"longitude": -122.3308,
					"state": "WA"
				},
				"id": "110843418940484"
			},
			"start_time": "2019-01-23T17:00:00-0800",
			"timezone": "America/Los_Angeles",
			"type": "public",
			"updated_time": "2019-01-22T00:19:23+0000",
			"id": "314206125967993"
		}],
		"paging": {
			"cursors": {
				"before": "QVFIUjh3X3dZAeEpUSmZAGLUxFQzdrMU1WR2JIRVIwaFpmbFphd0RteDQ1cWNRT1hMOV9kMjVkX013WW1OZAXNUNWpTUnNrcFRTRkx1M2ZAPVy1XeDFjRngyN1lR",
				"after": "QVFIUkNtVWJDMF9PNDJZAWnJQdm4ycVRheHJfWTVCSzZAaUWFWbkZAsZAU4wMFZAqMVEyMVg4SXpRR0k4aWdSb0xyX0N0bnBuRjVZAakhMMUV3OXFSZAGZARcGRFeUxn"
			}
		}
	},
	"id": "103592304090531"
}