break_num <-as.numeric(factor(bone_breaks, levels = break_type))#surgery cost to fix broken bonesurgery_cost <-round(1500+ age *10+ break_num *200+rnorm(n, mean =0, sd =50), 0)#time in hours for surgerysurgery_time <-round(12- age *0.1+rnorm(n, mean =0, sd =5), 1)#percent of recovery without complicationsrecovery <-round(5+ surgery_cost *0.005- surgery_time *0.05+rnorm(n, mean =0, sd =1), 1)#data frame for break databreak_data <-data.frame(age, break_type, surgery_cost, surgery_time, recovery)summary(break_data)
age break_type surgery_cost surgery_time
Min. :10.00 Length:1200 Min. :1729 Min. :-7.00
1st Qu.:12.00 Class :character 1st Qu.:2148 1st Qu.: 7.40
Median :14.00 Mode :character Median :2292 Median :10.60
Mean :14.01 Mean :2296 Mean :10.58
3rd Qu.:16.00 3rd Qu.:2487 3rd Qu.:13.70
Max. :18.00 Max. :2823 Max. :32.20
recovery
Min. :11.00
1st Qu.:14.90
Median :16.10
Mean :16.01
3rd Qu.:17.20
Max. :20.50
#surgery cost by ageggplot(break_data, aes(x = age, y = surgery_cost)) +geom_point() +geom_smooth(method ="lm")
`geom_smooth()` using formula = 'y ~ x'
#surgery cost by break typeggplot(break_data, aes(x = break_type, y = surgery_cost)) +geom_boxplot()
#surgery time by ageggplot(break_data, aes(x = age, y = surgery_time)) +geom_point() +geom_smooth(method ="lm")
`geom_smooth()` using formula = 'y ~ x'
#surgery cost by timeggplot(break_data, aes(x = surgery_time, y = surgery_cost)) +geom_point() +geom_smooth(method ="lm")
`geom_smooth()` using formula = 'y ~ x'
#recovery rate by surgery timeggplot(break_data, aes(x = surgery_time, y = recovery)) +geom_point() +geom_smooth(method ="lm")
`geom_smooth()` using formula = 'y ~ x'
###Fit simple models to the data
#cost by age and break type modelcost_model <-lm(surgery_cost ~ age + break_type, data = break_data)summary(cost_model)
Call:
lm(formula = surgery_cost ~ age + break_type, data = break_data)
Residuals:
Min 1Q Median 3Q Max
-554.64 -150.89 -1.69 189.80 524.72
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2202.709 46.912 46.954 <2e-16 ***
age 5.856 3.172 1.846 0.0651 .
break_typecompound 12.296 23.519 0.523 0.6012
break_typegreenstick 24.524 23.512 1.043 0.2971
break_typespiral 16.426 23.525 0.698 0.4852
break_typetransverse 1.879 23.568 0.080 0.9365
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 257.5 on 1194 degrees of freedom
Multiple R-squared: 0.003977, Adjusted R-squared: -0.0001939
F-statistic: 0.9535 on 5 and 1194 DF, p-value: 0.4453
#time by age modeltime_model <-lm(surgery_time ~ age, data = break_data)summary(time_model)
Call:
lm(formula = surgery_time ~ age, data = break_data)
Residuals:
Min 1Q Median 3Q Max
-17.1776 -3.2440 0.0192 3.1614 21.8880
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 12.4627 0.8666 14.382 <2e-16 ***
age -0.1344 0.0610 -2.204 0.0277 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 4.973 on 1198 degrees of freedom
Multiple R-squared: 0.004037, Adjusted R-squared: 0.003205
F-statistic: 4.856 on 1 and 1198 DF, p-value: 0.02774
#recovery rate by cost modelrecovery_model <-lm(recovery ~ surgery_cost + surgery_time, data = break_data)summary(recovery_model)
Call:
lm(formula = recovery ~ surgery_cost + surgery_time, data = break_data)
Residuals:
Min 1Q Median 3Q Max
-2.9103 -0.7416 -0.0134 0.7328 3.1936
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.045455 0.268072 18.821 <2e-16 ***
surgery_cost 0.005007 0.000114 43.902 <2e-16 ***
surgery_time -0.050294 0.005895 -8.531 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.016 on 1197 degrees of freedom
Multiple R-squared: 0.6222, Adjusted R-squared: 0.6215
F-statistic: 985.6 on 2 and 1197 DF, p-value: < 2.2e-16
#exploring another type of modelrecovery_glm <-glm(recovery ~ surgery_cost + surgery_time, family =gaussian(link ="identity"), data = break_data)summary(recovery_glm)
Call:
glm(formula = recovery ~ surgery_cost + surgery_time, family = gaussian(link = "identity"),
data = break_data)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.045455 0.268072 18.821 <2e-16 ***
surgery_cost 0.005007 0.000114 43.902 <2e-16 ***
surgery_time -0.050294 0.005895 -8.531 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 1.032061)
Null deviance: 3269.7 on 1199 degrees of freedom
Residual deviance: 1235.4 on 1197 degrees of freedom
AIC: 3448.3
Number of Fisher Scoring iterations: 2