# R Machine learning Code:
library(mlr3verse)

# import dataset
# need to specify full path for PowerBI:
df_sales <- 
  read_csv('C:/Users/evancarey/Dropbox/Work/BHAnalytics/courseware/PowerBI/PowerBI_R/PredAnalyticsWebinar/data/customer_sales1.csv')
# first define task
#### Define data task #### 
## first must change date to numeric
df_sales$transaction_Date <- as.numeric(df_sales$transaction_Date)
df_sales$sale <- factor(df_sales$sale)

## remove the sale amount since it tells us sale information
df_sales$sale_amount <- NULL

## now we create task
task <- 
  as_task_classif(df_sales, 
                  target='sale',
                  id='sales')
# examine task traits
task

# check truth
table(task$truth())
prop.table(table(task$truth()))

# check the dataset
skimr::skim(task$data())


# benchmark a few models
design = benchmark_grid(
  tasks = task,
  learners = lrns(c("classif.ranger",
                    "classif.log_reg",
                    "classif.featureless"),
                  predict_type = "prob", predict_sets = c("train", "test")),
  resamplings = rsmps("cv", folds = 3)
)

# execute the models
bmr = benchmark(design)

# View the results 
autoplot(bmr,
         measure = msr('classif.auc',predict_sets = "test", id="auc_test")) + 
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))

# pick the best model and make predictions
learner <- lrn("classif.log_reg",predict_type = "prob")
learner$train(task)
# check it has been trained
learner$state
# now make predictions 
predictions_df <- 
  as.data.table(learner$predict(task))
df_sales$predicted_Sale <-
  predictions_df$prob.Yes

