library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
<- read_csv("PrivateCarIns1975-Data.csv") PCarIns
Rows: 128 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): Cpol.Age, Car.Group, Cveh.Age
dbl (4): Pol.Age, Veh.Age, Avg.Claims, Numb.Claims
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
<- PCarIns %>% filter(Numb.Claims>0) # remove the 3 categories with no claims
PCarIns
str(PCarIns)
spc_tbl_ [123 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ Pol.Age : num [1:123] 1 1 1 1 1 1 1 1 1 1 ...
$ Cpol.Age : chr [1:123] "17-20" "17-20" "17-20" "17-20" ...
$ Car.Group : chr [1:123] "A" "A" "A" "A" ...
$ Veh.Age : num [1:123] 1 2 3 4 1 2 3 4 1 2 ...
$ Cveh.Age : chr [1:123] "0-3" "4-7" "8-9" "10+" ...
$ Avg.Claims : num [1:123] 289 282 133 160 372 249 288 11 189 288 ...
$ Numb.Claims: num [1:123] 8 8 4 1 10 28 1 1 9 13 ...
- attr(*, "spec")=
.. cols(
.. Pol.Age = col_double(),
.. Cpol.Age = col_character(),
.. Car.Group = col_character(),
.. Veh.Age = col_double(),
.. Cveh.Age = col_character(),
.. Avg.Claims = col_double(),
.. Numb.Claims = col_double()
.. )
- attr(*, "problems")=<externalptr>
# convert to categorical
<- PCarIns %>%
PCarIns mutate(Cpol.Age = factor(Cpol.Age),
Car.Group = factor(Car.Group),
Cveh.Age = factor(Cveh.Age))