Data analysis - strohne/Facepager GitHub Wiki
Data analysis is out of scope of Facepager. But you can open the exported files with Excel, SPSS, Stata, R or any analysis program you like.
We recommend R and RStudio for data wrangling. The following scripts may help you to read and prepare the data.
Sometimes it is easier to work with the different levels side by side (wide format) instead of one below the other (long format). The following R-Script converts the data from long to wide format. Install R and RStudio, open the script, adjust the filenames and run the script.
#
# This script converts csv files of Facepager
# from long to wide format by dissolving the
# hierarchy. It takes the lowest level
# (e.g. comments) and joins all levels
# above (e.g. posts, pages, seed) as columns
# instead of rows.
#
# Adjust the filenames in the last rows!
#
#
# Libraries ----
#
library(tidyverse)
library(janitor)
#
# Functions ----
#
# Transform to wide format
hierarchy_to_wide <- function(data.long) {
maxlevel = max(data.long$level)
for (currentlevel in c(maxlevel:0)) {
# Data of current level
data.level <- data.long %>%
filter(level == currentlevel) %>%
rename_all(funs(paste0("level",currentlevel,".",.)))
# Start with lowest level
if (currentlevel == maxlevel) {
data.wide <- data.level
}
# Join all other levels
else {
joinby <- paste0("level",currentlevel,".id")
names(joinby) <- paste0("level",currentlevel+1,".parent_id")
data.wide <- data.wide %>%
left_join(data.level,by=joinby)
}
}
# Clean environment
rm(data.level,currentlevel,joinby,maxlevel)
# Return value
return(data.wide)
}
# Remove empty columns
clean_columns <- function(data) {
data <- data %>%
mutate_if(is.character,~na_if(.,"")) %>%
remove_empty("cols")
return (data)
}
#
# Convert file ----
#
# Read csv file (semicolon as separator)
data.long <- read_csv2("input.csv",na = "None")
# Convert and clean
data.wide <- hierarchy_to_wide(data.long)
data.wide <- clean_columns(data.wide)
# Save result
write_excel_csv2(data.wide,"output.wide.csv",na ="")
You can work with the database file (a sqlite database) instead of exporting to csv. This way you can get all data without caring about the column setup. The following script opens the file and converts the response from JSON to columns of a data frame.
#
# This script opens a database file of Facepager
# and extracts all data into a data_frame. Adjust
# the filename in the last line.
#
#
# Libraries ---
#
library(tidyverse)
library(RSQLite)
library(jsonlite)
#
# Functions ----
#
# Function to read JSON from text
from_ndjson <- function(data) {
data[is.na(data)] = "{}"
jsonlite::stream_in(textConnection(data))
}
# Function to convert JSON in the response to columns of a data frame
parse_response_data <- function (nodes) {
responses = from_ndjson(nodes$response)
responses = jsonlite::flatten(responses,recursive = T)
colnames(responses) = paste0("response.",colnames(responses))
nodes = bind_cols(select(nodes,-response),responses)
rm(responses)
nodes
}
# Function to load data from the database
load_nodes <- function(dbname) {
db.con = dbConnect(RSQLite::SQLite(), dbname=dbname,flags=SQLITE_RO)
statement = paste0('select * from Nodes')
db.nodes = dbGetQuery( db.con,statement )
dbDisconnect(db.con)
as_tibble(db.nodes)
}
#
# Load data ----
#
data <- load_nodes("input.db")
data <- parse_response_data(data)
The following code shows how to prepare a like network for visualization with Gephi. First, you need to fetch and export likes of a page and the likes of the likes and so on. Second, you create a node list and an edge list by the following script.
#
# Load packages and data ----
#
library(tidyverse)
likes <- read_csv2("pirates-likes.csv",na = "None")
#
# Prepare data ----
#
# filter out irrelevant rows (filter)
# select relevant columns (select)
likes <- likes %>%
filter(object_type == "data") %>%
select(id,parent_id,object_id,name,level)
# Create edge list:
# - join parent row to every row (left_join)
# - Select and rename columns (select)
# - Remove duplicates (distinct)
likes.edges <- likes %>%
left_join(likes,by= c("parent_id"="id")) %>%
select(Source=object_id.y,Target=object_id.x) %>%
distinct() %>%
na.omit()
# Create node list:
# - Select and rename columns (select)
# - Remove duplicates
likes.nodes <- likes %>%
select(Id=object_id,Label=name) %>%
distinct()
#
# Save for Gephi ----
#
write_csv(likes.edges,"pirates-likes-edges.csv",na = "")
write_csv(likes.nodes,"pirates-likes-nodes.csv",na = "")