################################################################################ # # # Name: VDMML_R_API_ACCESS.TXT # # # # Purpose: Example script for API access to SAS® VDMML using R # # # # History: 7July2022 File Created. # # # # Copyright(c) 2022 SAS Institute Inc., Cary, NC, USA. All Rights Reserved. # # # ################################################################################ # Install required packages install.packages('httr') install.packages('jsonlite') install.packages('devtools') install.packages('dplyr') # Update as needed to use the latest zip file from the R-SWAT releases install.packages('https://github.com/sassoftware/R-swat/releases/download/v1.6.3/R-swat-1.6.3+vb21030-win-64.tar.gz', repos=NULL, type='file',INSTALL_opts="--no-multiarch") # Load the packages to be used library(devtools) library(swat) library(dplyr) library(httr) library(jsonlite) # Connect to CAS s <- CAS("https://mpmprodvdmml.ondemand.sas.com/cas-shared-default-http/", 443, protocol='https') # Print the server status to make sure we're connected print(cas.builtins.serverStatus(s)) # The s variable now holds the connection to CAS, which can be used to load data and run CAS actions. # Load a Project Data Sphere Data Set # The Project Data Sphere data sets are automatically available to the platform's SAS tools, including connections to CAS through Python. First, identify the Unique Dataset ID for the data set you are interested in using. # You can find this ID on the Access Data page for the data set. This example will use the Prostate Cancer DREAM Challenge data sets, which has a Unique Dataset ID of Prostat_na_2006_149. # List all files in the caslib res = cas.table.fileInfo( s, allFiles=TRUE, caslib="Prostat_na_2006_149") print(res) # After listing the files, let's look for the training data files: # List all files in the dataFiles_859 subdirectory res = cas.table.fileInfo(s, allFiles=TRUE, caslib="Prostat_na_2006_149", path="dataFiles_962") # the subdirectory with the training data print(res) print(res) # Let's load one of the tables into CAS: # Load the table into CAS res = cas.table.loadTable( s, sourceCaslib="Prostat_na_2006_149", casOut=list(name='core_train', replace=TRUE, caslib='CASUSER'), path="dataFiles_859/CoreTable_training.csv") print(res) # Create a CASTable to reference the table we just loaded into CAS core_train = defCasTable(s, 'core_train', caslib='CASUSER') # View the first 5 rows head(core_train) # Run a CAS Action to Perform Data Analysis # Let's run a frequency analysis on the STUDYID and DEATH columns in this table: # Load the freqTab action set loadActionSet(s, actionSet="freqTab") # Create a two-way cross tabulation table res = cas.freqTab.freqTab(s,table=core_train,includeMissing=TRUE, tabulate=c(vars=c('DEATH', 'STUDYID'))) print(res) # Changes to CAS tables are performed in-memory and will not persist once the user's session ends. You can save CAS tables to a SAS data set on the filesystem: # Save the CAS table to a .sashdat file in the CASUSER caslib cas.table.save(s, table=core_train, caslib="CASUSER", name="core_train.sashdat", replace=TRUE) # Alternatively, you can save your changes to a CSV file: # Save the CAS table to a CSV file in the CASUSER caslib cas.table.save(s, table=core_train, caslib="CASUSER", name="core_train.csv", replace=TRUE) # Finishing Up # Once you are done working with a CAS table and have saved any changes you want to keep, you should drop it from CAS memory to save system resources: # Drop the table from memory cas.table.dropTable(s, name='core_train', caslib='CASUSER')