version 9.0
/* Name: forest plot bcr by surgeon.do
Description: Forest plot of adjusted 5-year predicted probability of freedom from BCR by surgeon */
* change directory
cd "o:\Outcomes\Andrew\_UROLOGY Biostatistics\_Submitted\Accepted for publication\Bianco surgeons and recurrence\deidentify data for publication\"
* load deidentified data set for analysis
use "master learning curve data set deidentified.dta", clear
* because of large standard errors, we will not plot estimates for the following surgeons:
* a) surgeons with fewer than 40 total cases
* b) surgeons with fewer than 20 non-neoadjuvant cases
* for simplicity, these surgeons will be coded as a single reference group
* recode surgeon variable for surgeons with fewer than 40 total cases
g newanonsurgeonid = anonsurgeonid
gsort anonsurgeonid -experience
by anonsurgeonid: replace newanonsurgeonid = 0 if experience[1] < 40
* drop patients who received neoadjuvant therapy
quietly drop if nht==1
* recode surgeon variable for surgeons with fewer than 20 non-neoadjuvant cases
by anonsurgeonid: replace newanonsurgeonid = 0 if _N < 20
* create dummy variables for pathologic gleason, which was categorized as <=5 / 6 / 7 / 8 / >=9
g pgrade5 = pathgleason<=5
g pgrade6 = pathgleason==6
g pgrade7 = pathgleason==7
g pgrade8 = pathgleason==8
g pgrade9 = pathgleason>=9
* create variables used in the model (will be manipulated later to plug in the mean values for all covariates)
foreach v of varlist surgyear psa ece svi lni pgrade5 pgrade6 pgrade7 pgrade8 pgrade9 experience {
quietly g model`v' = `v'
* obtain mean values of the covariates
quietly sum model`v'
scalar mean`v' = r(mean)
}
* create knots for surgeon experience (will be put at the quartiles)
quietly centile modelexperience, c(25 50 75)
local k1 = r(c_1)
local k2 = r(c_2)
local k3 = r(c_3)
* create restricted cubic splines
spbase modelexperience, gen(modelspexperience) knots(`k1' `k2' `k3')
* stset data
stset ttlastfollowbcr, f(bcr)
* multivariable model, with survival times assumed to follow a log-logistic distribution;
* covariates are year of surgery, PSA, ECE, SVI, LNI, path Gleason, and surgeon experience
* surgeon is entered as a fixed effect so that we can obtain an estimate for each surgeon
streg modelsurgyear modelpsa modelece modelsvi modellni modelpgrade6 modelpgrade7 modelpgrade8 modelpgrade9 ///
modelexperience modelspexperience1 modelspexperience2 modelspexperience3 i.newanonsurgeonid, dist(llog) nolog
* calculate adjusted 5-year predicted recurrence-free probability
* save out parameter from model (estimated by data), which is a constant for all patients
scalar gamma = e(gamma)
* set covariates to mean values
foreach v of varlist surgyear psa ece svi lni pgrade5 pgrade6 pgrade7 pgrade8 pgrade9 {
quietly replace model`v' = scalar(mean`v')
}
* set surgeon experience to 40, which was chosen as a clinically relevant value
quietly replace modelexperience = 40
* also set spline variables corresponding to surgeon experience of 40
forvalues i=1(1)3 {
quietly sum modelspexperience`i' if experience==40
quietly replace modelspexperience`i' = r(mean)
}
* calculate predicted survival probability; obtain prediction for 5 years, i.e. 60 months
replace _t = 60
predict s60, surv
* to use meta-analytic techniques to obtain a combined probability across all surgeons,
* we need an estimate and standard error of the probability. this can be obtained by considering
* the predicted survival probably as a binomial proportion based on the surgoen's number of cases on the data set
* drop surgeons who will not be plotted because they had < 40 total cases or < 20 non-neoadjuvant cases
drop if newanonsurgeonid==0
* initialize variables that will store the final proportion, standard error, and number of cases
g finalp = .
g ncase = .
* for simplicity, create a numeric variable surgeon
egen nsurgeon = group(newanonsurgeonid)
quietly sum nsurgeon
local nsurgeon = r(max)
forvalues i=1(1)`nsurgeon' {
quietly sum s60 if nsurgeon==`i'
quietly cii r(N) r(mean)
quietly replace finalp = r(mean) if nsurgeon==`i'
quietly replace ncase = r(N) if nsurgeon==`i'
}
* keep only 1 row per surgeon (the values of interest are constant by surgeon)
sort nsurgeon
by nsurgeon: keep if _n==1
keep nsurgeon final* ncase
* randomly order the surgeons on the data set, so that it is not known where each surgeon lies
drop nsurgeon
g u = uniform()
sort u
g nsurgeon = _n
drop u
* since we are using meta-analytic commands, which will not recognize the values as proportions,
* we will use a transformation, specifically, the Freeman-Tukey arcsin transform
g n1 = round(ncase*finalp)
g pTransformed = asin(sqrt(n1/(ncase+1))) + asin(sqrt((n1+1)/(ncase+1)))
g SEpTransf = sqrt(1/(ncase+1))
* perform meta-analysis to obtain combined probability across all surgeons
quietly metan pTransformed SEpTransf, nograph
* back-transform to obtain combined probability across all surgeons and for each surgeon
local combined = (sin(r(ES) / 2))^2
g finalp_adj = (sin(pTransformed / 2))^2
g finalp_lb = (sin((pTransformed - invnorm(0.975)*SEpTransf)/ 2))^2
g finalp_ub = (sin((pTransformed + invnorm(0.975)*SEpTransf)/ 2))^2
* cut off values less than 60% so that there is less white space on the graph
local vars finalp_adj finalp_lb finalp_ub
foreach v of local vars {
quietly replace `v' = 0.6 if `v' < .6
}
* forest plot
twoway(rcap finalp_lb finalp_ub nsurgeon, horizontal lcolor(gs0)) || ///
(scatter nsurgeon finalp_adj, msymbol(square) mcolor(gs0) msize(small)), ///
legend(off) scheme(s1mono) ///
xtitle("5-year probability of freedom from BCR", margin(medium)) ///
xlabel(.6 "60%" .7 "70%" .8 "80%" 0.9 "90%" 1 "100%") xline(`combined', lcolor(gs0)) ///
ytitle("Surgeon", margin(medium)) ylabel(1(1)`nsurgeon', labsize(vsmall) nogrid angle(horizontal))