################################################################################
#                                                                              #
# Name: VDMML_R_API_ACCESS.TXT                                                 #
#                                                                              #
# Purpose: Example script for API access to SAS® VDMML using R                 #
#                                                                              #
# History: 7July2022 File Created.                                             #
#                                                                              #
# Copyright(c) 2022 SAS Institute Inc., Cary, NC, USA. All Rights Reserved.    #
#                                                                              #
################################################################################

# Install required packages
install.packages('httr')
install.packages('jsonlite')
install.packages('devtools')
install.packages('dplyr')
# Update as needed to use the latest zip file from the R-SWAT releases
install.packages('https://github.com/sassoftware/R-swat/releases/download/v1.6.3/R-swat-1.6.3+vb21030-win-64.tar.gz', repos=NULL, type='file',INSTALL_opts="--no-multiarch")

# Load the packages to be used
library(devtools)
library(swat) 
library(dplyr)
library(httr)
library(jsonlite)

# Connect to CAS 
s <- CAS("https://mpmprodvdmml.ondemand.sas.com/cas-shared-default-http/", 443, protocol='https') 
# Print the server status to make sure we're connected 
print(cas.builtins.serverStatus(s)) 
# The s variable now holds the connection to CAS, which can be used to load data and run CAS actions. 

# Load a Project Data Sphere Data Set 
# The Project Data Sphere data sets are automatically available to the platform's SAS tools, including connections to CAS through Python. First, identify the Unique Dataset ID for the data set you are interested in using. 
# You can find this ID on the Access Data page for the data set. This example will use the Prostate Cancer DREAM Challenge data sets, which has a Unique Dataset ID of Prostat_na_2006_149. 

# List all files in the caslib 
res = cas.table.fileInfo( s,  allFiles=TRUE, caslib="Prostat_na_2006_149") 
print(res) 

# After listing the files, let's look for the training data files: 
# List all files in the dataFiles_859 subdirectory 
res = cas.table.fileInfo(s, allFiles=TRUE, caslib="Prostat_na_2006_149", path="dataFiles_962") # the subdirectory with the training data print(res) 
print(res) 

# Let's load one of the tables into CAS: 
# Load the table into CAS 
res = cas.table.loadTable(  s,  sourceCaslib="Prostat_na_2006_149",  casOut=list(name='core_train', replace=TRUE, caslib='CASUSER'),  path="dataFiles_859/CoreTable_training.csv") 
print(res) 

# Create a CASTable to reference the table we just loaded into CAS 
core_train = defCasTable(s, 'core_train', caslib='CASUSER') 
# View the first 5 rows 
head(core_train) 

# Run a CAS Action to Perform Data Analysis 
# Let's run a frequency analysis on the STUDYID and DEATH columns in this table: 
# Load the freqTab action set 
loadActionSet(s, actionSet="freqTab") 

# Create a two-way cross tabulation table 
res = cas.freqTab.freqTab(s,table=core_train,includeMissing=TRUE, tabulate=c(vars=c('DEATH', 'STUDYID'))) 
print(res)

# Changes to CAS tables are performed in-memory and will not persist once the user's session ends. You can save CAS tables to a SAS data set on the filesystem: 
# Save the CAS table to a .sashdat file in the CASUSER caslib 
cas.table.save(s, table=core_train, caslib="CASUSER", name="core_train.sashdat", replace=TRUE) 

# Alternatively, you can save your changes to a CSV file: 
# Save the CAS table to a CSV file in the CASUSER caslib 
cas.table.save(s, table=core_train, caslib="CASUSER", name="core_train.csv", replace=TRUE) 

# Finishing Up 
# Once you are done working with a CAS table and have saved any changes you want to keep, you should drop it from CAS memory to save system resources: 
# Drop the table from memory 
cas.table.dropTable(s, name='core_train', caslib='CASUSER')