library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errorsPCarIns <- read_csv("PrivateCarIns1975-Data.csv")Rows: 128 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): Cpol.Age, Car.Group, Cveh.Age
dbl (4): Pol.Age, Veh.Age, Avg.Claims, Numb.Claims
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.PCarIns <- PCarIns %>% filter(Numb.Claims>0) # remove the 5 categories with no claims
str(PCarIns)spc_tbl_ [123 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ Pol.Age    : num [1:123] 1 1 1 1 1 1 1 1 1 1 ...
 $ Cpol.Age   : chr [1:123] "17-20" "17-20" "17-20" "17-20" ...
 $ Car.Group  : chr [1:123] "A" "A" "A" "A" ...
 $ Veh.Age    : num [1:123] 1 2 3 4 1 2 3 4 1 2 ...
 $ Cveh.Age   : chr [1:123] "0-3" "4-7" "8-9" "10+" ...
 $ Avg.Claims : num [1:123] 289 282 133 160 372 249 288 11 189 288 ...
 $ Numb.Claims: num [1:123] 8 8 4 1 10 28 1 1 9 13 ...
 - attr(*, "spec")=
  .. cols(
  ..   Pol.Age = col_double(),
  ..   Cpol.Age = col_character(),
  ..   Car.Group = col_character(),
  ..   Veh.Age = col_double(),
  ..   Cveh.Age = col_character(),
  ..   Avg.Claims = col_double(),
  ..   Numb.Claims = col_double()
  .. )
 - attr(*, "problems")=<externalptr> # convert to categorical
PCarIns <-  PCarIns %>% 
  mutate(Cpol.Age = factor(Cpol.Age),
         Car.Group = factor(Car.Group),
         Cveh.Age = factor(Cveh.Age))






