--- title: "Cross-Validation for Symbolic Regression" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Cross-Validation for Symbolic Regression} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ``` r library(leaf) if (!backend_available()) { message("Install backend with leaf::install_leaf()") } ``` ``` r set.seed(42) N <- 50L # Number of data points # Features x1 <- runif(N, min = 0, max = 5) x2 <- runif(N, min = 0, max = 1) # Target: y = 2 * x1^2 - 3 * x2 + noise y_b <- 2 * x1^2 - 3 * x2 + rnorm(N, mean = 0, sd = 1e-2) train_data <- data.frame(x1 = x1, x2 = x2, y = y_b) head(train_data) #> x1 x2 y #> 1 4.574030 0.33342721 40.846442 #> 2 4.685377 0.34674825 42.857433 #> 3 1.430698 0.39848541 2.914093 #> 4 4.152238 0.78469278 32.134514 #> 5 3.208728 0.03893649 20.475954 #> 6 2.595480 0.74879539 11.229410 ``` ``` r # Initialize the symbolic regressor regressor = leaf::SymbolicRegressor$new( engine = 'rsrm', num_iterations=4L, loss = 'MSE', max_params = 3, threshold = 1e-10, base = list(verbose = FALSE), mcts = list(times = 8), gp = list(times = 8) ) ``` ``` r # Stage 1: Discover equation skeletons search_results = regressor$search_equations( data = train_data, formula = "y ~ f(x1, x2)" ) #> 1. Processing data for equation search based on formula... #> 2. Running engine 'rsrm' over 1 folds using up to 1 processes... #> -- FINAL RESULTS -- #> Episode: 4/4 #> time: 18.18s #> loss: 7.798843366550043e-05 #> form: F #> HOF: #> equation complexity loss #> 0 0 0 999999999999999967336168804116691273849533185806555472917961779471295845921727862608739868455469056.00 #> 1 21.0992 1 273.35 #> 2 13.2467*X1 2 41.72 #> 3 4.4235*X1**2.25 3 1.04 #> 4 5.8143*X1**2 - 1.3268 4 0.71 #> 5 5.8201*X1**2 - 0.9692*X2 5 0.00 #> 6 5.8202*X1**2 - 0.9690*X2 - 0.0007 6 0.00 #> 7 5.8200*X1**2 - 0.9686*X2 - 0.0002/X1 8 0.00 #> 8 5.8200*X1**2 - 0.9686*X2 - 0.0000/X1**2 9 0.00 #> --- #> task:dataset_28e9753b-c27d-43d3-9c6d-481d58f40509 expr:5.819963910966452*X1**2 + -0.9686370971634156*X2 + -2.06850213849703e-06/X1**2 Loss_MSE:0.00 Test 0/1. #> final result: #> success rate : 0% #> average discovery time is 18.191 seconds #> Number of equations looked at (per test) [Total, Timed out, Successful]: [[1478, 0, 1478]] #> 3. Found 9 raw skeletons. Deduplicating... print("=== Search results ===") #> [1] "=== Search results ===" print(search_results) #> Equation Complexity #> 0 β1 1 #> 1 β1⋅x1 2 #> 2 β1⋅x1^2.25 3 #> 3 β1⋅x1^2 + -1⋅β2 4 #> 4 β1⋅x1^2 + -1⋅β2⋅x2 5 #> 5 β1⋅x1^2 + -1⋅β2⋅x2 + -1⋅β3 6 #> 6 β1⋅x1^2 + -1⋅β2⋅x2 + -1⋅β3⋅x1^-1 8 ``` ``` r # Stage 2: Fit parameters and compute loss (with CV) fit_results = regressor$fit( data = train_data, do_cv = TRUE, n_cv_folds = 5 ) #> Fitting parameters for 7 equations... #> - Performing CV for Equation with ID=0... #> - Performing CV for Equation with ID=1... #> - Performing CV for Equation with ID=2... #> - Performing CV for Equation with ID=3... #> - Performing CV for Equation with ID=4... #> - Performing CV for Equation with ID=5... #> - Performing CV for Equation with ID=6... #> Parameter fitting complete. print("\n=== Fit & CV results ===") #> [1] "\n=== Fit & CV results ===" print(fit_results) #> Equation Complexity Loss Loss (cv) #> 0 β1 1 2.733529e+02 2.802430e+02 #> 1 β1⋅x1 2 4.172247e+01 4.281000e+01 #> 2 β1⋅x1^2.25 3 1.040410e+00 1.084084e+00 #> 3 β1⋅x1^2 + -1⋅β2 4 7.098531e-01 7.611947e-01 #> 4 β1⋅x1^2 + -1⋅β2⋅x2 5 8.269706e-05 9.661740e-05 #> 5 β1⋅x1^2 + -1⋅β2⋅x2 + -1⋅β3 6 8.261101e-05 9.799049e-05 #> 6 β1⋅x1^2 + -1⋅β2⋅x2 + -1⋅β3⋅x1^-1 8 7.868977e-05 9.192478e-05 ``` ``` r # Stage 3: Evaluate additional metrics eval_table = regressor$evaluate( metrics = c('AIC', 'Elbow'), metrics_cv = c('R2','Elbow') ) print("\n=== Additional metrics ===") #> [1] "\n=== Additional metrics ===" print(eval_table) #> Equation Complexity Loss Loss (cv) #> 0 β1 1 2.733529e+02 2.802430e+02 #> 1 β1⋅x1 2 4.172247e+01 4.281000e+01 #> 2 β1⋅x1^2.25 3 1.040410e+00 1.084084e+00 #> 3 β1⋅x1^2 + -1⋅β2 4 7.098531e-01 7.611947e-01 #> 4 β1⋅x1^2 + -1⋅β2⋅x2 5 8.269706e-05 9.661740e-05 #> 6 β1⋅x1^2 + -1⋅β2⋅x2 + -1⋅β3⋅x1^-1 8 7.868977e-05 9.192478e-05 #> AIC AIC_w Elbow R2 (cv) Elbow (cv) #> 0 424.43203 2.343930e-111 4.675572e-03 -0.02520608 0.004560906 #> 1 330.44585 6.008769e-91 2.827208e-02 0.84338925 0.027575991 #> 2 145.87460 7.209807e-51 7.920838e-01 0.99603412 0.796609238 #> 3 128.75899 3.754305e-47 7.518792e-01 0.99721534 0.850040401 #> 4 -83.93372 5.756593e-01 3.929556e-01 0.99999965 0.414162284 #> 6 -81.94105 2.125508e-01 1.248170e-06 0.99999966 0.119702364 ```