DC Intermediate R - Utilities

# DC Intermediate R
DC Intermediate R - Conditionals and Control Flow
DC Intermediate R - Loops
DC Intermediate R - Functions
DC Intermediate R - The apply family
DC Intermediate R - Utilities

1 Useful Functions[ | ]

2 Mathematical utilities[ | ]

# The errors vector has already been defined for you
errors <- c(1.9, -2.6, 4.0, -9.5, -3.4, 7.3)

# Sum of absolute rounded values of errors
sum(round(abs(errors)))
## [1] 29

3 Find the error[ | ]

# Don't edit these two lines
vec1 <- c(1.5, 2.5, 8.4, 3.7, 6.3)
vec2 <- rev(vec1)

# Fix the error
# mean(abs(vec1), abs(vec2))

mean(c(abs(vec1), abs(vec2)))
## [1] 4.48

4 Data Utilities[ | ]

# The linkedin and facebook lists have already been created for you
linkedin <- list(16, 9, 13, 5, 2, 17, 14)
facebook <- list(17, 7, 5, 16, 8, 13, 14)

# Convert linkedin and facebook to a vector: li_vec and fb_vec
# unlist(): Flatten (possibly embedded) lists to produce a vector.
li_vec <- unlist(linkedin)
fb_vec <- unlist(facebook)

# Append fb_vec to li_vec: social_vec
# append(): Merge vectors or lists.
social_vec <- append(li_vec, fb_vec)

# Sort social_vec
sort(social_vec, decreasing = TRUE)
##  [1] 17 17 16 16 14 14 13 13  9  8  7  5  5  2

5 Find the error (2)[ | ]

# Fix me
round(sum(unlist(list(1.1, 3, 5))))
## [1] 9

# Fix me
rep(seq(1, 7, by = 2), times = 7)
##  [1] 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7

print(rep(seq(1, 7, by = 2), times = 7))
##  [1] 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7

6 Beat Gauss using R[ | ]

# Create first sequence: seq1
seq1 <- seq(1,500, by = 3)
print(seq1)
##   [1]   1   4   7  10  13  16  19  22  25  28  31  34  37  40  43  46  49
##  [18]  52  55  58  61  64  67  70  73  76  79  82  85  88  91  94  97 100
##  [35] 103 106 109 112 115 118 121 124 127 130 133 136 139 142 145 148 151
##  [52] 154 157 160 163 166 169 172 175 178 181 184 187 190 193 196 199 202
##  [69] 205 208 211 214 217 220 223 226 229 232 235 238 241 244 247 250 253
##  [86] 256 259 262 265 268 271 274 277 280 283 286 289 292 295 298 301 304
## [103] 307 310 313 316 319 322 325 328 331 334 337 340 343 346 349 352 355
## [120] 358 361 364 367 370 373 376 379 382 385 388 391 394 397 400 403 406
## [137] 409 412 415 418 421 424 427 430 433 436 439 442 445 448 451 454 457
## [154] 460 463 466 469 472 475 478 481 484 487 490 493 496 499

# Create second sequence: seq2
seq2 <- seq(1200, 900, by = -7)
print(seq2)
##  [1] 1200 1193 1186 1179 1172 1165 1158 1151 1144 1137 1130 1123 1116 1109
## [15] 1102 1095 1088 1081 1074 1067 1060 1053 1046 1039 1032 1025 1018 1011
## [29] 1004  997  990  983  976  969  962  955  948  941  934  927  920  913
## [43]  906

# Calculate total sum of the sequences
print(sum(append(seq1, seq2)))
## [1] 87029

7 Regular Expressions[ | ]

8 grepl & grep[ | ]

# The emails vector has
emails <- c('john.doe@ivyleague.edu', 'education@world.gov', 'dalai.lama@peace.org', 
            'invalid.edu', 'quant@bigdatacollege.edu', 'cookie.monster@sesame.tv')

# Use grepl() to match for 'edu'
print(grepl(pattern = 'edu', x = emails))
## [1]  TRUE  TRUE FALSE  TRUE  TRUE FALSE

# Use grep() to match for 'edu', save result to hits
hits <- grep(pattern = 'edu', x = emails)
hits
## [1] 1 2 4 5

# Subset emails using hits
print(emails[hits])
## [1] "john.doe@ivyleague.edu"   "education@world.gov"     
## [3] "invalid.edu"              "quant@bigdatacollege.edu"

9 grepl & grep (2)[ | ]

# The emails vector
emails <- c('john.doe@ivyleague.edu', 'education@world.gov', 'dalai.lama@peace.org', 
            'invalid.edu', 'quant@bigdatacollege.edu', 'cookie.monster@sesame.tv')

# Use grep() to match for .edu addresses more robustly
print(grep(pattern = '@.*\\.edu$',x = emails))
## [1] 1 5

# Use grepl() to match for .edu addresses more robustly, save result to hits
hits <- grepl(pattern = '@.*\\.edu$',x = emails)
hits
## [1]  TRUE FALSE FALSE FALSE  TRUE FALSE

# Subset emails using hits
print(emails[hits])
## [1] "john.doe@ivyleague.edu"   "quant@bigdatacollege.edu"

10 sub & gsub[ | ]

# The emails vector
emails <- c('john.doe@ivyleague.edu', 'education@world.gov', 'dalai.lama@peace.org', 
            'invalid.edu', 'quant@bigdatacollege.edu', 'cookie.monster@sesame.tv')

# Use sub() to convert the email domains to datacamp.edu (attempt 1)
print(sub(pattern = '@.*\\.edu$', replacement = 'datacamp.edu', x = emails))
## [1] "john.doedatacamp.edu"     "education@world.gov"     
## [3] "dalai.lama@peace.org"     "invalid.edu"             
## [5] "quantdatacamp.edu"        "cookie.monster@sesame.tv"

# Use sub() to convert the email domains to datacamp.edu (attempt 2)
print(sub(pattern = '@.*\\.edu$', replacement = '@datacamp.edu', x = emails))
## [1] "john.doe@datacamp.edu"    "education@world.gov"     
## [3] "dalai.lama@peace.org"     "invalid.edu"             
## [5] "quant@datacamp.edu"       "cookie.monster@sesame.tv"

11 sub & gsub (2)[ | ]

12 Times and Dates[ | ]

13 Right here, right now[ | ]

# Get the current date: today
today <- Sys.Date()
today
## [1] "2017-04-14"

# See what today looks like under the hood
print(unclass(today))
## [1] 17270

# Get the current time: now
now <- Sys.time()
now
## [1] "2017-04-14 08:29:36 EDT"

# See what now looks like under the hood
print(unclass(now))
## [1] 1492172976

14 Create and format dates[ | ]

library(date)

# Definition of character strings representing dates
str1 <- "May 23, 96"
str2 <- "2012-3-15"
str3 <- "30/January/2006"

# Convert the strings to dates: date1, date2, date3
date1 <- as.date(str1, order = "mdy")
date1
## [1] 23May96

date1 <- as.POSIXct(date1, format = "%d %m %y")
date1
## [1] "1996-05-22 20:00:00 EDT"

date2 <- as.date(str2, order = "ymd")
date2
## [1] 15Mar2012

date2 <- as.POSIXct(date2, format = "%d %m %y")
date2
## [1] "2012-03-14 20:00:00 EDT"

date3 <- as.date(str3, order = "dmy")
date3
## [1] 30Jan2006

date3 <- as.POSIXct(date3, format = "%d %m %y")
date3
## [1] "2006-01-29 19:00:00 EST"

# Convert dates to formatted strings
format(date1, "%A")
## [1] "mercredi"

format(date2, "%d")
## [1] "14"

format(date3, "%b %Y")
## [1] "janv. 2006"

# convert dates to character data
strDate2 <- as.character(date2)
strDate2
## [1] "2012-03-14 20:00:00"

15 Create and format times[ | ]

# Definition of character strings representing times
str1 <- "2012-3-12 14:23:08"

# Convert the strings to POSIXct objects: time1, time2
time1 <- as.POSIXct(str2, format = "%Y-%m-%d %H:%M:%S")

# Convert times to formatted strings

# Definition of character strings representing dates
format(time1, "%M")
## [1] NA

format(time1, "%I:%M %p")
## [1] NA

16 Calculations with Dates[ | ]

# day1, day2, day3, day4 and day5
day1 <- as.Date("2016-11-21")
day2 <- as.Date("2016-11-16")
day3 <- as.Date("2016-11-27")
day4 <- as.Date("2016-11-14")
day5 <- as.Date("2016-12-02")

# Difference between last and first pizza day
print(day5 - day1)

## Time difference of 11 days

# Create vector pizza
pizza <- c(day1, day2, day3, day4, day5)

# Create differences between consecutive pizza days: day_diff
day_diff <- diff(pizza, lag = 1, differences = 1)
day_diff

## Time differences in days
## [1]  -5  11 -13  18

# Average period between two consecutive pizza days
print(mean(day_diff))

## Time difference of 2.75 days

17 Calculations with Times[ | ]

# login and logout
login <- as.POSIXct(c("2016-11-18 10:18:04 UTC", "2016-11-23 09:14:18 UTC", "2016-11-23 12:21:51 UTC", "2016-11-23 12:37:24 UTC", "2016-11-25 21:37:55 UTC"))

logout <- as.POSIXct(c("2016-11-18 10:56:29 UTC", "2016-11-23 09:14:52 UTC", "2016-11-23 12:35:48 UTC", "2016-11-23 13:17:22 UTC", "2016-11-25 22:08:47 UTC"))

# Calculate the difference between login and logout: time_online
time_online <- logout - login

# Inspect the variable time_online
#class(time_online)
time_online
## Time differences in secs
## [1] 2305   34  837 2398 1852

# Calculate the total time online
print(sum(time_online))
## Time difference of 7426 secs

# Calculate the average time online
print(mean(time_online))
## Time difference of 1485.2 secs

18 Time is of the essence[ | ]

문서 댓글 ({{ doc_comments.length }})
{{ comment.name }} {{ comment.created | snstime }}