
# Author: Gabriela Shirkey
# Date: 05/11/23
# Subject: Supplemental information; PLS-SEM code
# Disclaimer/notes: This work refers to Ch.3 of the dissertation, specifically the PLS-SEM 2.0 following revisions with the theoretical framework, PCA, and evaluations for model fit.



# Library -----------------------------------------------------------------

library(seminr)
library(dplyr)

# [USER INPUT NEEDED] --------------------------------------------------
#Set WD and data sources
setwd("")

#Load data 
data <- read.csv("PLSSEM_df.csv", stringsAsFactors = FALSE)



#[STEP 1] Clean and organize data -------------------------------------------------

#remove what we're not trying to measure
  data <- data%>%dplyr::select(-c(year, county))
  names(data)
  
  
# Rename column names from list to the convention needed for the PLS-SEM package & explain why some variables are not included
  data<- data%>%
  dplyr::rename(
    LC_1 = WAT,#Percent water landcover
    LC_2 = FOR, #Percent forest cover
    #REMOVED RHD: rural housing landscape had a low loading  0.47 and is correlated with SC, it must be removed to improve the HTMT
   
    RD_1 = IRR, #Percent irrigated land 
    RD_2 = URB, #Rural housing density homes per km-2
    RD_3 = FINO, #Farm income per operation km-2
    RD_4 = NFN, #Non-farm N application estimates kg km-2
    RD_5 = POPD, #Population density
    #REMOVED URD: Urban housing density, the RhoA was too high,
   
    LM_1 = FD,#Farm density*
    LM_2 = CRP,#Enrollment in conservation reserve program
    LM_3 = FOW, #Farms owned rather than rented 
    LM_4 = FLD, #Farmland density
    LM_5 = CRO, #Percent cropland land cover
    #REMOVED SAS, SAD, NT (soils and no-till, low loadings) 
   
    
    WS_1 = VPD,
    #REMOVED WET: Percent wetlands, low loading 0.222 
    
    SO_1 = SAS,
    SO_2 = NT,
    
      #CLS,#loading 0.452
      #OMD,# loading 0.252
      #PHS, SIS removed because of high RhoA
      #CLD, PHD, SID all removed as well because HTMT is too close to LCC and RD
    
    NU_1 = FP, 
    #REMOVED: low loading & conflict
      #NU_2 = NFP
      #NU_3 = FN
    
    TI_1 = CVT, 
    
    HS_1 = TPM,
    
    NPP = NPP)%>%dplyr::select(c(
      "LC_1", "LC_2", 
      "LM_1", "LM_2", "LM_3","LM_4", "LM_5",
      "RD_1", "RD_2", "RD_3", "RD_4","RD_5",
      "WS_1", 
      "SO_1", "SO_2", 
      "NU_1",
      "TI_1",
      "HS_1",
      "NPP"))

names(data)
   


#[STEP 2] Build the PLS-SEM measurement and structural models -------------------------------------------
  #SEMinR uses constructs() to specify the list of all construct measurement models;
  #composite() specifies the measurement of individual constructs
#

# Design the relationships in the measurement model 
    #First build all model constructs
    #reflective constructs use Mode_A and formative constructs use mode_B
    #CBSEM use reflective common-factor constructs, not composites like PLS-SEM
    
    simple_mm <- constructs( 
      composite("Regional Development", multi_items("RD_", 1:5)),
      composite("Land Management", multi_items("LM_" , 1:5)) ,
      composite("Water Stress", single_item("WS_1")) ,
      composite("Land Cover Change", multi_items("LC_", 1:2)),
      composite("Soil Composition", multi_items("SO_", 1:2)),
      composite("Soil and Plant Nutrients", single_item("NU_1")),
      composite("Heat Stress", single_item("HS_1")),
      composite("Tillage", single_item("TI_1")),
      composite("NPP", single_item("NPP")), 
      interaction_term(iv = "Land Cover Change", moderator = "Land Management", method = two_stage))
    


# Design the relationships in the structural model

    simple_sm <- relationships(
      paths(from = "Regional Development", to = c("Land Management", "Land Cover Change")),
      paths(from = "Soil Composition", to = c("Land Management", "Land Cover Change")),
      paths(from = c( "Land Management", "Land Cover Change", "Water Stress", "Land Cover Change*Land Management", 
                      "Soil and Plant Nutrients", "Heat Stress", "Tillage"),  to ="NPP"))


      
#[STEP 3] Estimate PLS-SEM arguments ------------------------------------------------------
# Before analysis (STEP 4) check for convergence
##convergence: the stop criterion of the algorithm was reached and not the maximum number of iterations

  #First estimate the PLS
  simple_model <- estimate_pls(data=data,
                               measurement_model = simple_mm,
                               structural_model = simple_sm,
                               missing = mean_replacement,
                               missing_value = NA
  )


  #view the results
  (summary_simple<- summary(simple_model))
  
  #plot the results
  plot(simple_model)
  
  
  #check the iterations
  (summary_simple$iterations) #should be lower than 300 iterations
    #converged in 7




#[STEP 4] Inspect the PLS-SEM Measurement Model -------------------------------------------


#Inspect the summary paths
  summary_simple$paths

#Indicator reliability: How much of each indicator’s variance was explained by the construct?
    # This is estimated by the square of the indicator loading, where values >0.708 are recommended since they indicate >50% 
    # explanation of the indicator’s variance
    summary_simple$loadings
    summary_simple$loadings^2 
    #write.csv(summary_simple$loadings, "G:/My Drive/Dissertation/research_ch2/PCA_SEM/PCA_revised_loadings.csv")


#Internal consistency reliability & convergent validity:
summary_simple$reliability
  # How well do the indicators associate with one another?
      # We considered three separate measures. 
      # 
      # (1) Jöreskog’s (1971) composite reliability rhoc. Values between 0.60 and 0.70 are considered acceptable 
      # in exploratory research, 0.70-0.90 are satisfactory to good, 0.90-0.94 can be 
      # problematic and those >0.95 indicate redundant indicators and reduced construct 
      # validity (Diamantopoulos et al., 2012). 
      # 
      # (2) Cronbach’s alpha assumes the same threshold as rhoc. This estimate was used for a conservative lower-bounding, 
      # as it assumes all indictor loadings are the same in the population (i.e., tau-equivalence) 
      # and any violations would generate a lower value than rhoc. 
      # 
      # (3) For a balance, use Dijkstra’s (2010, 2014, 2015) rhoa (Hair et al., 2021). 

  # Does the construct converge to explain the variation of all indicators?
      # Use AVE to inspect convergent validity, which is the extent to which the construct converges to explain the variation of its indicators. 
      # AVE is the grand mean value of the squared loadings of the indicators associated with the construct. Thus, AVE is equivalent 
      # to the communality of a construct, with acceptable ranges >0.50 (i.e., explaining more than 50% of the indicator’s variance in that construct).

  


#Are the constructs statistically unique? 
  summary_simple$validity$htmt
   #less than 0.9 for similar, 0.85 otherwise


#[STEP 5] Bootstrapping -----------------------------------------------------------
# We want to know which parts are significant. 
# Bootstrapping estimates standard errors and computes confidence intervals.
# Bootstrap the estimated_pls() model; for a quick analysis use 1,000; but the final should use 10,000

      #bootstrap the model
      boot_simple <- bootstrap_model(seminr_model = simple_model,
                                     nboot = 1000, #change to 10,000 when ready to real analysis
                                     cores=NULL,
                                     seed= 123)
      #store the summary of the bootstrapped model
      (summary_boot <- summary(boot_simple, alpha = 0.1))
      plot(boot_simple)

#inspect bootstrap paths
      summary_boot$bootstrapped_paths  # Inspect the model RSquares 
      summary_simple$fSquare # Inspect the effect sizes 
      summary_boot$bootstrapped_loadings^2 #Indicator reliability: How much of each indicator’s variance was explained by the construct?
      summary_boot$bootstrapped_HTMT #Are the constructs statistically unique? 

#Check the moderator/interaction effect using slope analysis (OPTIONAL)

# Simple slope analysis plot slope_analysis(
        moderated_model = slope_analysis(simple_model, 
                           iv = "Land Cover Change", 
                           moderator = "Land Management", 
                           dv="NPP",
                           leg_place = "topleft")
        moderated_model


#[STEP 6] Evaluation of PLS structural model --------------------------------------
       
# Inspect the structural model collinearity VIF 
        #(evaluation of the collinearity of predictor constructs in relation to each endogenous construct.)
        #Not required for PLS-SEM reflective models, but interesting to see
        summary_simple$vif_antecedents

# Inspect the structural paths
  summary_boot$bootstrapped_paths 
# Inspect the total effects
  summary_boot$bootstrapped_total_paths 
 
  
#Inspect total indirect effects
  summary_simple$total_indirect_effects
  
#Inspect indirect effects of Regional Development and Soil Composition
      specific_effect_significance(boot_simple,
                                 from = "Regional Development",
                                 through = "Land Management",
                                 to= "NPP", 
                                 alpha = 0.1)
    # Original Est. Bootstrap Mean   Bootstrap SD        T Stat.          5% CI         95% CI 
    # 0.2929634      0.2967660      0.0774797      3.7811630      0.1664293      0.4177719 


      specific_effect_significance(boot_simple,
                                   from = "Soil Composition",
                                   through = "Land Management",
                                   to= "NPP", 
                                   alpha = 0.1)
      # Original Est. Bootstrap Mean   Bootstrap SD        T Stat.          5% CI         95% CI 
      # -0.29807920    -0.30530112     0.08766295    -3.40028715    -0.44885078    -0.16522768



    # Generate the model predictions if the model is strong enough -- this one isn't
        #predict_simple_model <- predict_pls( model = simple_model, technique = predict_DA, noFolds = 10, reps = 10)
        # Summarize the prediction results 
        #(sum_predict_simple_model <- summary(predict_simple_model))


#[STEP 7] USER-INPUT NEEDED Write the reports -------------------------------------------------------

# write.csv(summary_boot$bootstrapped_total_paths, file= ".csv", row.names = FALSE)
# write.csv(summary_boot$bootstrapped_loadings, file= ".csv", row.names = FALSE)
# write.csv(summary_boot$bootstrapped_HTMT, file= ".csv", row.names = FALSE)
# write.csv(summary_boot$bootstrapped_loadings^2, file= ".csv", row.names = FALSE)



###END####