DC Introduction to R - Data frames

# DC Introduction to R
DC Introduction to R - Intro to basics
DC Introduction to R - Vectors
DC Introduction to R - Matrices
DC Introduction to R - Factors
DC Introduction to R - Data frames
DC Introduction to R - Lists

1 What's a data frame?[ | ]

# Print out built-in R data frame
mtcars
if(0)"
                     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
"

2 Quick, have a look at your data set[ | ]

# Call head() on mtcars
head(mtcars)
if(0)"
                   mpg cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
"

3 Have a look at the structure[ | ]

# Investigate the structure of mtcars
str(mtcars)
if(0)"
'data.frame':	32 obs. of  11 variables:
 $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
 $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
 $ disp: num  160 160 108 258 360 ...
 $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
 $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
 $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
 $ qsec: num  16.5 17 18.6 19.4 17 ...
 $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
 $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
 $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
 $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
"

4 Creating a data frame[ | ]

# Definition of vectors
name <- c("Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune")
type <- c("Terrestrial planet", "Terrestrial planet", "Terrestrial planet", 
          "Terrestrial planet", "Gas giant", "Gas giant", "Gas giant", "Gas giant")
diameter <- c(0.382, 0.949, 1, 0.532, 11.209, 9.449, 4.007, 3.883)
rotation <- c(58.64, -243.02, 1, 1.03, 0.41, 0.43, -0.72, 0.67)
rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)

# Create a data frame from the vectors
planets_df <- data.frame(name, type, diameter, rotation, rings)

5 Creating a data frame (2)[ | ]

# Check the structure of planets_df
str(planets_df)
#'data.frame':	8 obs. of  5 variables:
# $ name    : Factor w/ 8 levels "Earth","Jupiter",..: 4 8 1 3 2 6 7 5
# $ type    : Factor w/ 2 levels "Gas giant","Terrestrial planet": 2 2 2 2 1 1 1 1
# $ diameter: num  0.382 0.949 1 0.532 11.209 ...
# $ rotation: num  58.64 -243.02 1 1.03 0.41 ...
# $ rings   : logi  FALSE FALSE FALSE FALSE TRUE TRUE ...

6 Selection of data frame elements[ | ]

# The planets_df data frame from the previous exercise is pre-loaded

# Print out diameter of Mercury (row 1, column 3)
print(planets_df[1,3])
#[1] 0.382

# Print out data for Mars (entire fourth row)
print(planets_df[4,])
#  name               type diameter rotation rings
#4 Mars Terrestrial planet    0.532     1.03 FALSE

7 Selection of data frame elements (2)[ | ]

# The planets_df data frame from the previous exercise is pre-loaded

# Select first 5 values of diameter column
print(planets_df[1:5,"diameter"])
#[1]  0.382  0.949  1.000  0.532 11.209

8 Only planets with rings[ | ]

# planets_df is pre-loaded in your workspace

# Select the rings variable from planets_df
rings_vector <- planets_df$rings
  
# Print out rings_vector
print(rings_vector)
#[1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE

9 Only planets with rings (2)[ | ]

# planets_df and rings_vector are pre-loaded in your workspace

# Adapt the code to select all columns for planets with rings
planets_df[rings_vector, "name"]
#     name      type diameter rotation rings
#5 Jupiter Gas giant   11.209     0.41  TRUE
#6  Saturn Gas giant    9.449     0.43  TRUE
#7  Uranus Gas giant    4.007    -0.72  TRUE
#8 Neptune Gas giant    3.883     0.67  TRUE

10 Only planets with rings but shorter[ | ]

# planets_df is pre-loaded in your workspace

# Select planets with diameter < 1
subset(planets_df, subset = diameter < 1)
#     name               type diameter rotation rings
#1 Mercury Terrestrial planet    0.382    58.64 FALSE
#2   Venus Terrestrial planet    0.949  -243.02 FALSE
#4    Mars Terrestrial planet    0.532     1.03 FALSE

11 Sorting[ | ]

# Play around with the order function in the console

12 Sorting your data frame[ | ]

# planets_df is pre-loaded in your workspace

# Use order() to create positions
positions <- order(planets_df$diameter)

# Use positions to sort planets_df
planets_df[positions, ]
#     name               type diameter rotation rings
#1 Mercury Terrestrial planet    0.382    58.64 FALSE
#4    Mars Terrestrial planet    0.532     1.03 FALSE
#2   Venus Terrestrial planet    0.949  -243.02 FALSE
#3   Earth Terrestrial planet    1.000     1.00 FALSE
#8 Neptune          Gas giant    3.883     0.67  TRUE
#7  Uranus          Gas giant    4.007    -0.72  TRUE
#6  Saturn          Gas giant    9.449     0.43  TRUE
#5 Jupiter          Gas giant   11.209     0.41  TRUE
문서 댓글 ({{ doc_comments.length }})
{{ comment.name }} {{ comment.created | snstime }}