rstats – Page 2

peek

install.packages('swirl') #download swirl package 
library(swirl) #load in swirl package
library(rtweet)
library(dplyr)
library(purrr)
library(igraph)
library(ggraph)

r_users <- search_users("#rstats", n = 1000)

r_users %>% summarise(n_users = n_distinct(screen_name))

##   n_users
## 1     564

r_users_info <- lookup_users(r_users$screen_name)

r_users_info %>% select(dplyr::contains("count")) %>% head()

##   followers_count friends_count listed_count favourites_count
## 1            8311           366          580             9325
## 2           44474            11         1298                3
## 3           11106           524          467            18495
## 4           12481           431          542             7222
## 5           15345          1872          680            27971
## 6            5122           700          549             2796
##   statuses_count
## 1          66117
## 2           1700
## 3           8853
## 4           6388
## 5          22194
## 6          10010

r_users_ranking <- r_users_info %>%
  filter(protected == FALSE) %>% 
  select(screen_name, dplyr::contains("count")) %>% 
  unique() %>% 
  mutate(followers_percentile = ecdf(followers_count)(followers_count),
         friends_percentile = ecdf(friends_count)(friends_count),
         listed_percentile = ecdf(listed_count)(listed_count),
         favourites_percentile = ecdf(favourites_count)(favourites_count),
         statuses_percentile = ecdf(statuses_count)(statuses_count)
         ) %>% 
  group_by(screen_name) %>% 
  summarise(top_score = followers_percentile + friends_percentile + listed_percentile + favourites_percentile + statuses_percentile) %>% 
  ungroup() %>% 
  mutate(ranking = rank(-top_score))

top_30 <- r_users_ranking %>% arrange(desc(top_score)) %>% head(30) %>% arrange(desc(top_score))
top_30 

## # A tibble: 30 x 3
##        screen_name top_score ranking
##              <chr>     <dbl>   <dbl>
##  1          hspter  4.877005       1
##  2    RallidaeRule  4.839572       2
##  3         DEJPett  4.771836       3
##  4 modernscientist  4.752228       4
##  5 nicoleradziwill  4.700535       5
##  6      tomhouslay  4.684492       6
##  7    ChetanChawla  4.639929       7
##  8   TheSmartJokes  4.627451       8
##  9   Physical_Prep  4.625668       9
## 10       Cataranea  4.602496      10
## # ... with 20 more rows

top30_lookup <- r_users_info %>%
  filter(screen_name %in% top_30$screen_name) %>% 
  select(screen_name, user_id)

top30_lookup$gender <- c("M", "F", "F", "F", "F",
                         "M", "M", "M", "F", "F", 
                         "F", "M", "M", "M", "F", 
                         "F", "M", "M", "M", "M", 
                         "M", "M", "M", "F", "M",
                         "M", "M", "M", "M", "M")

table(top30_lookup$gender)

## 
##  F  M 
## 10 20

top_30_usernames <- top30_lookup$screen_name

friends_top30a <-   map(top_30_usernames[1:15 ], get_friends)
names(friends_top30a) <- top_30_usernames[1:15]

# 15 minutes later....
friends_top30b <- map(top_30_usernames[16:30], get_friends)

# turning lists into data frames and putting them together
friends_top30 <- append(friends_top30a, friends_top30b)
names(friends_top30) <- top_30_usernames

# purrr - trick I've been banging on about!
friends_top <- map2_df(friends_top30, names(friends_top30), ~ mutate(.x, twitter_top_user = .y)) %>% 
  rename(friend_id = user_id) %>% select(twitter_top_user, friend_id)


# getting a full list of friends
SJ1 <- get_friends("TheSmartJokes")
SJ2 <- get_friends("TheSmartJokes", page = next_cursor(SJ1))

# putting the data frames together 
SJ_friends <-rbind(SJ1, SJ2) %>%  
  rename(friend_id = user_id) %>% 
  mutate(twitter_top_user = "TheSmartJokes") %>% 
  select(twitter_top_user, friend_id)

# the final results - over 8000 friends, rather than 5000
str(SJ_friends) 

## 'data.frame':    8611 obs. of  2 variables:
##  $ twitter_top_user: chr  "TheSmartJokes" "TheSmartJokes" "TheSmartJokes" "TheSmartJokes" ...
##  $ friend_id       : chr  "390877754" "6085962" "88540151" "108186743" ...

friends_top30 <- friends_top %>% 
  filter(twitter_top_user != "TheSmartJokes") %>% 
  rbind(SJ_friends) 

# select friends that are top30 users
final_friends_top30 <- friends_top  %>% 
  filter(friend_id %in% top30_lookup$user_id)

# add friends' screen_name
final_friends_top30$friend_name <- top30_lookup$screen_name[match(final_friends_top30$friend_id, top30_lookup$user_id)]

# add users' and friends' gender
final_friends_top30$user_gender <- top30_lookup$gender[match(final_friends_top30$twitter_top_user, top30_lookup$screen_name)]
final_friends_top30$friend_gender <- top30_lookup$gender[match(final_friends_top30$friend_name, top30_lookup$screen_name)]

## final product!!!
final <- final_friends_top30 %>% select(-friend_id)

head(final)

##   twitter_top_user     friend_name user_gender friend_gender
## 1         hrbrmstr nicoleradziwill           M             F
## 2         hrbrmstr        kara_woo           M             F
## 3         hrbrmstr      juliasilge           M             F
## 4         hrbrmstr        noamross           M             M
## 5         hrbrmstr      JennyBryan           M             F
## 6         hrbrmstr     thosjleeper           M             M

f1 <- graph_from_data_frame(final, directed = TRUE, vertices = NULL)
V(f1)$Popularity <- degree(f1, mode = 'in')

ggraph(f1, layout='kk') + 
  geom_edge_fan(aes(alpha = ..index..), show.legend = FALSE) +
  geom_node_point(aes(size = Popularity)) +
  theme_graph( fg_text_colour = 'black') 

ggraph(f1, layout='kk') + 
  geom_edge_fan(aes(alpha = ..index..), show.legend = FALSE) +
  geom_node_point(aes(size = Popularity)) +
  geom_node_text(aes(label = name, fontface='bold'), 
                 color = 'white', size = 3) +
  theme_graph(background = 'dimgray', text_colour = 'white',title_size = 30) 

ggraph(f1, layout='kk') + 
  geom_edge_fan(aes(alpha = ..index..), show.legend = FALSE) +
  geom_node_point(aes(size = Popularity)) +
  theme_graph( fg_text_colour = 'black') +
  geom_edge_link(aes(colour = friend_gender)) +
  scale_edge_color_brewer(palette = 'Set1') + 
  labs(title='Top 30 #rstats users and gender of their friends')

ggraph(f1, layout='kk') + 
  geom_edge_fan(aes(alpha = ..index..), show.legend = FALSE) +
  geom_node_point(aes(size = Popularity)) +
  theme_graph( fg_text_colour = 'black') +
  facet_edges(~user_gender) +
  geom_edge_link(aes(colour = friend_gender)) +
  scale_edge_color_brewer(palette = 'Set1') +
  labs(title='Top 30 #rstats users and gender of their friends', subtitle='Graphs are separated by top user gender, edge colour indicates their friend gender' )

final %>% 
  group_by(user_gender, friend_gender) %>% 
  summarize(n = n()) %>% 
  group_by(user_gender) %>% 
  mutate(sum = sum(n),
         percent = round(n/sum, 2)) 

## # A tibble: 4 x 5
## # Groups:   user_gender [2]
##   user_gender friend_gender     n   sum percent
##         <chr>         <chr> <int> <int>   <dbl>
## 1           F             F    26    57    0.46
## 2           F             M    31    57    0.54
## 3           M             F    55   101    0.54
## 4           M             M    46   101    0.46

paulvanderlaken.com

A/B testing and Statistics at Etsy, by Emily Robinson

New to R? Kickstart your learning and career with these 6 steps!

Step 1: An R Folder (15 min)

Step 2: Handy Cheat Sheets (15 min)

Step 3: `swirl` Away in RStudio (8h)

Step 4: A Pirate’s Guide to R (10h)

Step 5: R for Data Science (16h)

Step 6: Specialize (∞)

Beer-in-hand Data Science

Networks Among #rstats Twitterers

Reposted from Kasia Kulma’s github with minor modifications.

IMPORTING #RSTATS USERS

SCORING AND CHOOSING TOP #RSTATS USERS

GETTING FRIENDS NETWORK

VISUALIZING FRIENDS NETWORKS

About the author:

Share this:

Step 1: An R Folder (15 min)

Step 2: Handy Cheat Sheets (15 min)

Step 3: swirl Away in RStudio (8h)

Step 4: A Pirate’s Guide to R (10h)

Step 5: R for Data Science (16h)

Step 6: Specialize (∞)

Share this:

Share this:

Reposted from Kasia Kulma’s github with minor modifications.

IMPORTING #RSTATS USERS

SCORING AND CHOOSING TOP #RSTATS USERS

GETTING FRIENDS NETWORK

VISUALIZING FRIENDS NETWORKS

About the author:

Share this:

Step 3: `swirl` Away in RStudio (8h)