version 9.0
/* Name: bootstrap ci for difference in 10 vs 250 bcr learning curve.do
Description: use bootstrap resampling to construct a 95% confidence interval for the difference in adjusted
5-year probability of recurrence for a patient treated by a surgeon with 10 vs 250 prior cases */
* change directory
cd "o:\Outcomes\Andrew\_UROLOGY Biostatistics\_Submitted\Accepted for publication\Bianco surgeons and recurrence\deidentify data for publication\"
* store the code for the primary analysis in a program, which will be invoked to be run for each bootstrap sample
capture program drop BOOTSTRAPCI
program BOOTSTRAPCI, rclass
preserve
* create dummy variables for pathologic gleason, which was categorized as <=5 / 6 / 7 / 8 / >=9
g pgrade5 = pathgleason<=5
g pgrade6 = pathgleason==6
g pgrade7 = pathgleason==7
g pgrade8 = pathgleason==8
g pgrade9 = pathgleason>=9
* create variables used in the model (will be manipulated later to plug in the mean values for all covariates)
foreach v of varlist surgyear psa ece svi lni pgrade5 pgrade6 pgrade7 pgrade8 pgrade9 {
quietly g model`v' = `v'
* obtain mean values of the covariates
quietly sum model`v'
scalar mean`v' = r(mean)
}
* create restricted cubic splines for surgeon experience (knots will be put at the quartiles)
quietly centile experience, c(25 50 75)
local k1 = r(c_1)
local k2 = r(c_2)
local k3 = r(c_3)
spbase experience, gen(spexperience) knots(`k1' `k2' `k3')
* save out min and max so we can calculate splines at specified values, i.e. `num' and 250 prior cases
quietly sum experience
local k0 = r(min)
local kN = r(max)
* stset data
stset ttlastfollowbcr, f(bcr)
* multivariable model, with survival times assumed to follow a log-logistic distribution;
* covariates are year of surgery, PSA, ECE, SVI, LNI, and path Gleason
* specify cluster option using anonsurgeonid, since patients treated by the same surgeon are not independent
streg modelsurgyear modelpsa modelece modelsvi modellni modelpgrade6 modelpgrade7 modelpgrade8 modelpgrade9 ///
experience spexperience1 spexperience2 spexperience3, dist(llog) cluster(anonsurgeonid) nolog
* calculate adjusted 5-year predicted recurrence-free probability
* set covariates to mean values
foreach v of varlist surgyear psa ece svi lni pgrade5 pgrade6 pgrade7 pgrade8 pgrade9 {
quietly replace model`v' = scalar(mean`v')
}
* obtain prediction for 5 years, i.e. 60 months
replace _t = 60
* get prediction at `num' and 250 prior cases
foreach num of numlist 10 250 {
* set experience to `num', and calculate corresponding spline values
quietly {
replace experience = `num'
replace spexperience1 = - (`kN' - `k1') / (`kN' - `k0') * (`num' - `k0') ^ 3
replace spexperience1 = spexperience1 + (`num' - `k1') ^ 3 if `num' > `k1'
replace spexperience2 = - (`kN' - `k2') / (`kN' - `k0') * (`num' - `k0') ^ 3
replace spexperience2 = spexperience2 + (`num' - `k2') ^ 3 if `num' > `k2'
replace spexperience3 = - (`kN' - `k3') / (`kN' - `k0') * (`num' - `k0') ^ 3
replace spexperience3 = spexperience3 + (`num' - `k3') ^ 3 if `num' > `k3'
* obtain adjusted survival probability
predict s, s
* obtain adjusted probability of recurrence
g f`num'= 100 - s * 100
drop s
}
}
* return the results of the program as scalars
quietly sum f10
local a = r(mean)
quietly sum f250
local b = r(mean)
return scalar delta = `a' - `b'
restore
end
* load deidentified data set for analysis
use "master learning curve data set deidentified.dta", clear
* drop patients who received neoadjuvant therapy
quietly drop if nht==1
* here is where you would set seed to replicate results
* invoke the bootstrap command, which will take a bootstrap sample with replacement from the data set
* in memory, run the program, repeat 1000 times, and save the results
bootstrap delta=r(delta), reps(1000) ///
saving("output bootstrap ci for difference 10 vs 250 learning curve.dta", replace): BOOTSTRAPCI
/* Note: The bootstrap command above was used to generate the confidence intervals that were reported in the paper.
We noticed after publication that the bootstrap command should have been specified with the cluster option, to be consistent
with the rest of our analyses, which accounted for the data structure where patients treated by the same surgeon
form non-independent clusters. The bootstrap command given below is the appropriate command, with the cluster option specified.
bootstrap delta=r(delta), reps(1000) cluster(anonsurgeonid) ///
saving("output bootstrap ci for difference 10 vs 250 learning curve.dta", replace): BOOTSTRAPCI */
* load bootstrap output
use "output bootstrap ci for difference 10 vs 250 learning curve.dta", clear
* the 95% confidence interval is obtained from the 2.5th and 97.5th centiles of the distribution of differences
quietly centile delta, c(2.5 97.5)
disp "95% confidence interval for 10 vs 250: " string(r(c_1), "%9.1f") "% to " string(r(c_2), "%9.1f") "%"