]> code.communitydata.science - social-media-chapter.git/blob - code/prediction/utils.R
initial import of material for public archive into git
[social-media-chapter.git] / code / prediction / utils.R
1
2 # Use this to check for underpopulated cells
3 gen.counts <- function(df, c.var){
4     tapply(df[,"eid"], c.var, function(x) length(unique(x)))
5 }
6
7 # use this to remove underpopulated cells
8 restrict <- function(df, c.var, c.min){
9     var.counts <- gen.counts(df, c.var)
10     out.df <- df[c.var %in% names(var.counts[var.counts >
11                                                    c.min]),] 
12     return(out.df)
13 }

Community Data Science Collective || Want to submit a patch?