/* ******************************************************************************* This code is useful for placing the output into an tables in an rtf file. Note that lines 394 and 395 are needed to complete this output process. The results of the macro runs on line 12 would be automatically stored as temp_output.rtf in the folder "designs" on the z drive. Change names to meet your needs. If you don't, the program still runs and you can save the output by going to the file command menu (top left). You will also be also be prompted at the end of the program if you want to open or save the file. The save option allows you to save it under any name in any folder. */ ods rtf file='z:\designs\temp_output.rtf' style=Journal ; /* **************************************************************************** One Sample T-Test and Wilcoxon Sign Rank Test performance by simulation By J. Shuster, University of Florida jshuster@biostat.ufl.edu Copying this Macro is permitted for teaching purposes or for personal use. ************************************************************************** */ %macro srpower(n,center,alpha, reps, seed,dc,y1,f1,y2,f2,y3,f3,y4,f4,y5,f5, y6,f6,y7,f7,y8,f8,y9,f9,y10,f10); /* This macro provides for a simulation of the actual power for two-sided p-values associated with the one sample Wilcoxon Sign-Rank Test, the T-test, and its normal approximation The user supplies the sample sizes, null hypothesized center (mean),the type I error rate (alpha), the cumulative distribution for which a power calculation is needed. If the hypothesized center is the mean of the distribution , the macro will estimate the true Type I error for each of the three tests, whose intended type I error is alpha (say 0.05). Under non-null conditions, the Macro also projects the sample sided needed to obtain 80% and 90% power, using a Brownian Motion projection. ********************************************************************************* Macro Usage: %srpower(n1,center,alpha, reps, seed,dc, y1,f1, y2,f2, y3,f3, y4,f4, y5,f5, y6,f6, y7,f7, y8,f8, y9,f9, y10,f10); Note the convenience of putting the parameters up to the dc parameter on one line, and data yj (the values you observe) and fj the cumulative distribution on succeeding lines. You have to put dots (missing values) for the last few y,f if your distribution is over fewer than 10 points. Here is an example: (1.3 with n=20 from the paper) %srpower(20,0,.05, 100000, 1711,0, -2,.2, -1,.4, 0,.6, 1,.8, 2,1, .,., .,., .,., .,., .,.); *************************************************************************** Input parameters: Must be 26 entries with 25 commas separating them. Missing values are denoted by dots (periods)but at least the first 10 must be non-missing, and missing values can only be used for the last entries (an even number up to 16). n=sample size center=hypothesize mean (or measure of central tendency)=0 alpha=.05 (Intended Type I error rate=5%) reps=Number of simulations=100000 seed=a large odd number to seed the randomization (<2**31)=1711 dc=0 if discrete or 1=continuous (see below) In this case 0=discrete yj and fj: The cumulative distribution, F satisfies F(yj)=P(Y<=yj)=fj. You can put in up to ten points and 00, then that mass is uniformly distributed between 0 ad Y1. Warning: If you have a negative Y1, and a continuous distribution, this will not work. However, if the lowest possible y value is specified with a a cumulative of zero, then you can handle negative values. %srpower(20,0,.05, 100000, 1711,0, -2.5,0 -1.5,.2, -.5,.4, .5,.6, 1.5,.8, 2.5,1, .,., .,., .,., .,.); This would give you a uniform distribution between -2.5 and +2.5. ****************************************************************************** Output: It estimates the true probability that the Sign Rank Test, one sample t-test by the t-distribution and normal distribution in absolute exceeds the cutoff for the test at the nominal (tabulated) value. Standard errors for these estimates are also provided. The population means and standard deviations are given as well. It also provides projections on the needed sample size to obtain 80% and 90% power at P=alpha (user supplied), two-sided, for the Sign Rank and T-tests. Note: If the means under the null and alternate are the same, the asymptotic power calculations are not done. The output also summarizes the input information. ***************************************************************************** Binary outcome application: %srpower(42,.8,.05, 100000, 1711,0, 0,.4, 1,1, .,., .,., .,., .,., .,., .,., .,., .,.); The actual mean is 0.6 under the alternate (0*.4+1*.6), and the hypothesized mean is 0.8. (you are testing for a binomial response rate of 80%, when the true response rate is 60%. The program gives you the power of the test. ***************************************************************************** */ data a; time1=datetime(); options cleanup; n=&n;center=¢er;alpha=αreps=&reps;seed=&seed;dc=&dc; f1=&f1;y1=&y1; f2=&f2;y2=&y2; f3=&f3;y3=&y3; f4=&f4;y4=&y4; f5=&f5;y5=&y5; f6=&f6;y6=&y6; f7=&f7;y7=&y7; f8=&f8;y8=&y8; f9=&f9;y9=&y9; f10=&f10;y10=&y10; yy1=y1;yy2=y2; yy3=max(y3,yy2); yy4=max(y4,yy3); yy5=max(y5,yy4); yy6=max(y6,yy5); yy7=max(y7,yy6); yy8=max(y8,yy7); yy9=max(y9,yy8); yy10=max(y10,yy9); array f f1-f10; array ff ff1-ff10; do over ff; ff=f;if f=. then ff=1; end; if dc=0 then do; mean=ff1*yy1+(ff2-ff1)*yy2 +(ff3-ff2)*yy3+(ff4-ff3)*yy4 +(ff5-ff4)*yy5+(ff6-ff5)*yy6 +(ff7-ff6)*yy7 +(ff8-ff7)*yy8 +(ff9-ff8)*yy9 +(ff10-ff9)*yy10; e2=ff1*yy1*yy1+(ff2-ff1)*yy2*yy2 +(ff3-ff2)*yy3*yy3+(ff4-ff3)*yy4*yy4 +(ff5-ff4)*yy5*yy5+(ff6-ff5)*yy6*yy6 +(ff7-ff6)*yy7*yy7 +(ff8-ff7)*yy8*yy8 +(ff9-ff8)*yy9*yy9 +(ff10-ff9)*yy10*yy10; end; if dc=1 then do; mean=.5*ff1*yy1+.5*(yy2+yy1)*(ff2-ff1)+.5*(yy3+yy2)*(ff3-ff2)+.5*(yy4+yy3)*(ff4-ff3) +.5*(yy5+yy4)*(ff5-ff4)+.5*(yy6+yy5)*(ff6-ff5)+.5*(yy7+yy6)*(ff7-ff6)+.5*(yy8+yy7)*(ff8-ff7) +.5*(yy9+yy8)*(ff9-ff8)+.5*(yy10+yy9)*(ff10-ff9); e2= ((ff1*yy1*yy1)/3)+ ((yy2*yy2+yy2*yy1+yy1*yy1)/3)*(ff2-ff1)+ ((yy3*yy3+yy3*yy2+yy2*yy2)/3)*(ff3-ff2)+ ((yy4*yy4+yy4*yy3+yy3*yy3)/3)*(ff4-ff3)+ ((yy5*yy5+yy5*yy4+yy4*yy4)/3)*(ff5-ff4)+ ((yy6*yy6+yy6*yy5+yy5*yy5)/3)*(ff6-ff5)+ ((yy7*yy7+yy7*yy6+yy6*yy6)/3)*(ff7-ff6)+ ((yy8*yy8+yy8*yy7+yy7*yy7)/3)*(ff8-ff7)+ ((yy9*yy9+yy9*yy8+yy8*yy8)/3)*(ff9-ff8)+ ((yy10*yy10+yy10*yy9+yy9*yy9)/3)*(ff10-ff9); end; sd_alt=sqrt(e2-mean*mean); data b;set a; do i1=1 to reps; do i2=1 to n; call ranuni(seed,w); if dc=0 then do; y=yy10; if wf1 and wf2 and wf3 and wf4 and wf5 and wf6 and wf7 and wf8 and wf9 and f9>. then y=yy9+(((w-f9)/(1-f9))*(yy10-yy9)); end; keep i1 y center; output;end; end; data b;set b; proc sort;by i1; data b;set b; proc sort;by i1; data b;set b;y=y-center; ay=abs(y); PROC rank ties=mean out=b;by i1;var ay;ranks ry; data b;set b; if y<0 then ry=-ry; if y=0 then ry=0; ry2=ry*ry; keep i1 y ry ry2; proc means noprint;by i1;var y ry ry2; output out=new1 mean=y ry ry2 std=sy sry sry2 n=ny nry nry2; data new1;set new1;keep y ry sy ry2 ny ; data new1;set new1;xxx=1; if sy<.00000001 then sy=.000000001; if ry2<.00000001 then ry2=.000000001; t=abs(y*sqrt(ny)/sy); sry=sqrt(ry2/(ny-1)); tr=abs(ry/sry); proc sort;by xxx; data a;set a;xxx=1; proc sort;by xxx; data ax;set a;keep xxx alpha; data new1;merge new1 ax;by xxx; zc=probit(1-.5*alpha); tc=tinv(1-.5*alpha,ny-1); sigt=0;if t>tc then sigt=1; sigs=0;if tr>zc then sigs=1; sigz=0;if t>zc then sigz=1; proc means mean stderr noprint;by xxx;var sigt sigz sigs; output out=new2 mean=p_t p_z p_sr stderr=se_t se_z se_sr; data new2;merge new2 a;by xxx; data new2;set new2; if p_t<.99 then t_factor80=((.84-probit(alpha/2))/(probit(p_t)-probit(alpha/2)))**2; if p_sr<.99 then w_factor80=((.84-probit(alpha/2))/(probit(p_sr)-probit(alpha/2)))**2; if p_t<.99 then t_factor90=((1.282-probit(alpha/2))/(probit(p_t)-probit(alpha/2)))**2; if p_sr<.99 then w_factor90=((1.282-probit(alpha/2))/(probit(p_sr)-probit(alpha/2)))**2; nt_80=int(.5+n*t_factor80); nt_90=int(.5+n*t_factor90); nw_80=int(.5+n*w_factor80); nw_90=int(.5+n*w_factor90); if abs(mean-center)>.00001*sd_alt then do; ntt80=((.84-probit(alpha/2))/(mean-center))**2; ntt80=int(ntt80*(sd_alt**2)+.5); ntt90=((1.282-probit(alpha/2))/(mean-center))**2; ntt90=int(ntt90*(sd_alt**2)+.5); end; run; data new2;set new2; file print; put // @10 'Power Simulation of One sample Sign rank and T-test'; if dc=0 then put / @10 'Cumulative Discrete Distributions'; if dc=1 then put / @10 'Cumulative Distributions at selected points-rest by linear interpolation'; put / @10 'P(<=' y1 best5. @20 ')=' @23 f1 z5.4 / @10 'P(<=' y2 best5. @20 ')=' @23 f2 z5.4 ; if f3>. then put @10 'P(<=' y3 best5. @20 ')=' @23 f3 z5.4 ; if f4>. then put @10 'P(<=' y4 best5. @20 ')=' @23 f4 z5.4 ; if f5>. then put @10 'P(<=' y5 best5. @20 ')=' @23 f5 z5.4 ; if f6>. then put @10 'P(<=' y6 best5. @20 ')=' @23 f6 z5.4 ; if f7>. then put @10 'P(<=' y7 best5. @20 ')=' @23 f7 z5.4 ; if f8>. then put @10 'P(<=' y8 best5. @20 ')=' @23 f8 z5.4 ; if f9>. then put @10 'P(<=' y9 best5. @20 ')=' @23 f9 z5.4 ; if f10>. then put @10 'P(<=' y10 best5. @20 ')=' @23 f10 z5.4 /; put @10 'Hypothesized Center=' center best6. @42 'Mean under Alternative=' mean best6.; put / @10 'SD under Alt=' sd_alt best6. ; put // @10 'Total sample size=' n // @10 'Based on ' reps ' simulations with seed=' seed // @10 'Nominal Alpha=' alpha ' Two-sided' // @10 'Statistical Test' @30 'Estimated Power' @50 'Standard Error' /@10 'Rank Method' / @10 'Wilcoxon Sign Rank' @30 p_sr @50 se_sr best7. //@10 'Parametric Method' / @10 'T-test' @30 p_t @50 se_t best7. @60 '(T-Cutoff)' / @10 'T-test' @30 p_z @50 se_z best7. @60 '(Normal-Cutoff)'; if abs(mean-center)>.01*sd_alt then do; put _page_; put / @10 'Sample Size Projections for 80% and 90% power'; put /@10 'Method=Brownian Motion'; put /@10 'Actual Sample Size=' n; put /@10 'Type I error=' alpha ' Two-sided'; put / @10 'Test' @25 '80% Power' @55 '90% Power'; put / @10 'Sign-Rank' @25 'n=' nw_80 @55 'n=' nw_90 ; put / @10 'T-test' @25 'n=' nt_80 @55 'n=' nt_90 ; if p_t<.20 then put / @10 'Warning: Power for t is too low for accurate t-test Brownian Motion projection' / @10 'Try larger sample size, say double, to get new projection' / @10 'T-requirement by confidence interval inversion n=' ntt80 'for 80% power' / @10 'T-requirement ++by confidence interval inversion n=' ntt90 ' for 90% power'; if p_sr<.20 then put // @10 'Warning: Power for Sign-rank is too low for accurate Brownian Motion projection' / @10 'Try larger sample sizes, say double, to get new projection'; end; time2=datetime();time1=time2-time1; put // @30 'Total Run Time=' time1 7.2 ' Seconds'; run; %mend; title 'Example 1.3 with n=20'; %srpower(20,0,.05, 100000, 1711,0, -2,.2, -1,.4, 0,.6, 1,.8, 2,1, .,., .,., .,., .,., .,.); ods rtf close; run;