/* ******************************************************************************* This code is useful for placing the output into an tables in an rtf file. Note that lines 464 and 465 are needed to complete this output process. The results of the macro runs on line 13 would be automatically stored as temp_output.rtf in the folder "designs" on the z drive. Change names to meet your needs. If you don't, the program still runs and you can save the output by going to the file command menu (top left). You will also be also be prompted at the end of the program if you want to open or save the file. The save option allows you to save it under any name in any folder. */ ods rtf file='z:\designs\temp_output.rtf' style=Journal ; /* ************************************************************************ Two Sample T-Test, Z-Test (corrected and uncorrected for unequal variance plus Wilcoxon Test performance by simulation By J. Shuster, University of Florida jshuster@biostat.ufl.edu Copying this Macro is permitted for teaching purposes or for personal use. ****************************************************************************** */ missing x X; %macro wilpower(n1, n2,alpha, reps, seed1,seed2,dc,y1,f1,g1,y2,f2,g2,y3,f3,g3,y4,f4,g4,y5,f5,g5, y6,f6,g6,y7,f7,g7,y8,f8,g8,y9,f9,g9,y10,f10,g10); /* This macro provides for a simulation of the actual power for two-sided p-values associated with the Wilcoxon Test, the T-test, and the Satterthwaite corrected Z-test and the normal approximations as cutoffs for the latter two. The user supplies the two sample sizes, the type I error rate (alpha), the number of replications, seed numbers, dc (discrete=0 and continuous=1) and the two cumulative distributions where a power calculation is needed. If the two distributions are the same, the macro will estimate the true Type I error for each of the three tests, whose intended type I error is alpha (say 0.05). ************************************************************************* Macro Usage: %wilpower(n1,n2,alpha, reps, seed1,seed2,dc, y1,f1,g1, y2,f2,g2, y3,f3,g3, y4,f4,g4, y5,f5,g5, y6,f6,g6, y7,f7,g7, y8,f8,g8, y9,f9,g9, y10,f10,g10); Note the convenience of putting the parameters up to the dc parameter on one line, and data yj (the values you observe), and fj and gj the cumulative distribution on succeeding lines. You have to put dots (missing values) for the last few y,f,g if your distribution is over fewer than 10 points. Here is an example: title 'Example 2.12 with n1=27 and n2=54'; %wilpower(27,54,.05,100000,6371,7357,1, -2,0,0, -1,0,.1, 0,.5,.3, 1,1,.6, 2,1,1, .,.,., .,.,., .,.,., .,.,., .,.,.); **************************************************************************** Input parameters: Must be 37 entries with 36 commas separating them. Missing values are denoted by dots (periods)but at least the first 13 fields must be non missing, and missing values can only be used for the last entries (a multiple of three up to 24). n1=sample size for the first group=27 n2=sample size for the second group=54 alpha=.05 (Intended Type I error rate=5%) reps=Number of simulations=100000 seed1=a large odd number to seed the randomization (<2**31)=6371 seed1=a large odd number to seed the randomization (<2**31)=7357 dc=0 if discrete or 1=continuous (see below) In this case 1=continuous fj,gj, and yj: The cumulative distributions, F and G, satisfy for group 1: F(yj)=P(Y<=yj)=fj and for group 2 G(yj)=P(Y<=yj)=gj . You can put in up to ten points and 0f1 and wf2 and wf3 and wf4 and wf5 and wf6 and wf7 and wf8 and wf9 and f9>. then y=yy9+(((w-f9)/(1-f9))*(yy10-yy9)); end; keep i1 x y alpha; output;end; end; data b2;set a; do i1=1 to reps; do i2=1 to n2; x=1; call ranuni(seed2,w); if dc=0 then do; y=yy10; if wg1 and wg2 and wg3 and wg4 and wg5 and wg6 and wg7 and wg8 and wg9 and g9>. then y=yy9+(((w-g9)/(1-g9))*(yy10-yy9)); end; keep i1 x y alpha; output; end; end; data b;set b b2; proc sort;by i1 x; data b;set b; proc rank ties=mean;;by i1;var y;ranks ry; proc means noprint;by i1 x;var y ry alpha; output out=new mean=y ry alpha std=sy sry sa n=ny nny na; data new;set new; data new0;set new;if x=0; rybar1=ry;ybar1=y; srybar1=sry;sybar1=sy; n1=ny; keep i1 n1 rybar1 ybar1 srybar1 sybar1 alpha; data new1;set new;if x=1; rybar2=ry;ybar2=y; srybar2=sry;sybar2=sy; n2=ny; keep i1 n2 rybar2 ybar2 srybar2 sybar2; data new;merge new0 new1;by i1; data new;set new; n=n1+n2; wt1=((sybar1**2)/n1); wt2=((sybar2**2)/n2); df2=((wt1+wt2)**2)/((wt1*wt1/(n1-1))+(wt2*wt2/(n2-1))); if df2=. then df2=1; sse=((sybar1**2)*(n1-1))+((sybar2**2)*(n2-1)); se=sse/(n-2); se=sqrt(se*((1/n1)+(1/n2))); if se<.00000001 then se=.00000001; t=abs(ybar1-ybar2)/se; sse_r=((srybar1**2)*(n1-1))+((srybar2**2)*(n2-1)); meanr=(n1*rybar1+n2*rybar2)/n; ss_tr=n1*((rybar1-meanr)**2)+n2*((rybar2-meanr)**2); ss_tot_r=ss_tr+sse_r; var=ss_tot_r/(n-1); ser=sqrt(var*n2/(n1*n)); if ser<.00000001 then ser=.00000001; z=abs(rybar1-meanr)/ser; seu=sqrt(wt1+wt2); if seu<.00000001 then seu=.00000001; zz=abs(ybar1-ybar2)/seu; countw=0;countt=0;counts=0;counttz=0;countzu=0; if z>-probit(alpha/2) then countw=1; if t>-tinv(alpha/2,n-2) then countt=1; if zz>-tinv(alpha/2,df2) then counts=1; if t>-probit(alpha/2) then counttz=1; if zz>-probit(alpha/2) then countzu=1; proc means maxdec=5 mean n stderr noprint; var countw countt counts counttz countzu; output out=d mean=p_wil p_t p_z p_t_normal p_zu_normal stderr=se_wil se_t se_z se_t_normal se_zu_normal; data d;set d;xxx=1;proc sort;by xxx;; data a;set a;xxx=1;proc sort;by xxx; data a;merge a d;by xxx; if p_t<.99 then t_factor80=((.84-probit(alpha/2))/(probit(p_zu_normal)-probit(alpha/2)))**2; if p_wil<.99 then w_factor80=((.84-probit(alpha/2))/(probit(p_wil)-probit(alpha/2)))**2; if p_t<.99 then t_factor90=((1.282-probit(alpha/2))/(probit(p_zu_normal)-probit(alpha/2)))**2; if p_wil<.99 then w_factor90=((1.282-probit(alpha/2))/(probit(p_wil)-probit(alpha/2)))**2; n1t_80=int(.5+n1*t_factor80); n1t_90=int(.5+n1*t_factor90); n1w_80=int(.5+n1*w_factor80); n1w_90=int(.5+n1*w_factor90); n2t_80=int(.5+n2*t_factor80); n2t_90=int(.5+n2*t_factor90); n2w_80=int(.5+n2*w_factor80); n2w_90=int(.5+n2*w_factor90); if abs(mean-meana)>.00001*sd_null then do; theta=n1/(n1+n2); nt80=((.84-probit(alpha/2))/(mean-meana))**2; qw=((sd_null**2)/theta)+((sd_alt**2)/(1-theta)); nt80=int(nt80*qw+.5); nt90=((1.282-probit(alpha/2))/(mean-meana))**2; nt90=int(nt90*qw+.5); nt801=int(.5+theta*nt80);nt802=nt80-nt801; nt901=int(.5+theta*nt90);nt902=nt90-nt901; end; file print; put // @10 'Power Simulation of Wilcoxon, T-test, Satterthwaite Corrected T'; if dc=0 then put / @10 'Cumulative Discrete Distributions'; if dc=1 then put / @10 'Cumulative Distributions at selected points-rest by linear interpolation'; put / @10 'P(<=' y1 best5. @20 ')=' @23 f1 z5.4 ' vs ' @32 g1 z5.4 / @10 'P(<=' y2 best5. @20 ')=' @23 f2 z5.4 ' vs ' @32 g2 z5.4 ; if f3>. then put @10 'P(<=' y3 best5. @20 ')=' @23 f3 z5.4 ' vs ' @32 g3 z5.4 ; if f4>. then put @10 'P(<=' y4 best5. @20 ')=' @23 f4 z5.4 ' vs ' @32 g4 z5.4 ; if f5>. then put @10 'P(<=' y5 best5. @20 ')=' @23 f5 z5.4 ' vs ' @32 g5 z5.4; if f6>. then put @10 'P(<=' y6 best5. @20 ')=' @23 f6 z5.4 ' vs ' @32 g6 z5.4 ; if f7>. then put @10 'P(<=' y7 best5. @20 ')=' @23 f7 z5.4 ' vs ' @32 g7 z5.4 ; if f8>. then put @10 'P(<=' y8 best5. @20 ')=' @23 f8 z5.4 ' vs ' @32 g8 z5.4; if f9>. then put @10 'P(<=' y9 best5. @20 ')=' @23 f9 z5.4 ' vs ' @32 g9 z5.4 ; if f10>. then put @10 'P(<=' y10 best5. @20 ')=' @23 f10 z5.4 ' vs ' @32 g10 z5.4 /; put / @10 'Mean under Null=' mean best6. @36 'Mean under Alternative=' meana best6.; put / @10 'SD under Null=' sd_null best6. @36 'SD under Alternative=' sd_alt best6.; q=n1+n2; put // @10 'Total sample size=' q ' Group 1 N=' n1 ' Group 2 N=' n2 // @10 'Based on ' reps ' simulations with seeds=' seed1 ' and ' seed2 /// @10 'Nominal Alpha=' alpha ' Two-sided' // @10 'Statistical Test' @30 'Estimated Power' @50 'Standard Error' / @10 'Rank Test' / @10 'Wilcoxon' @30 p_wil @50 se_wil best7. // @10 'Parametric Tests' /@10 'Pooled Variance' / @10 'T-test' @30 p_t @50 se_t best7. @60 '(T Cutpoint)' / @10 'T-test' @30 p_t_normal @50 se_t_normal best7. @60 '(Normal Cutpoint)' //@10 'Unpooled Variance' / @10 'Z,Unpooled Var' @30 p_z @50 se_z best7. @60 '(Satterthwaite)' / @10 'Z,Unpooled Var' @30 p_zu_normal @50 se_zu_normal best7. @60 '(Normal Cutpoint)'; if abs(mean-meana)>.01*sd_alt then do; put _page_; put // @10 'Sample Size Projections for 80% and 90% power'; put //@10 'Method=Brownian Motion'; put //@10 'Actual Sample Sizes=' n1 'and ' n2; put //@10 'Type I error=' alpha ' Two-sided'; put / @10 'Test' @25 '80% Power' @55 '90% Power'; put / @10 'Wilcoxon' @25 'n1=' n1w_80 ' and n2=' n2w_80 @55 'n1=' n1w_90 ' and n2=' n2w_90 ; put / @10 'T-test' @25 'n1=' n1t_80 ' and n2=' n2t_80 @55 'n1=' n1t_90 ' and n2=' n2t_90 ; if p_t<.20 then put // @10 'Warning: Power for t is too low for accurate t-test Brownian Motion projection' // @10 'Try larger sample sizes, say double, to get new projection' // @10 'Total T-requirement by confidence interval inversion=' nt80 'per group for 80% power' /@10 nt801 best5. ' to group 1 and ' @35 nt802 best5. ' to group 2' // @10 'Total T-requirement by confidence interval inversion=' nt90 'per group for 90% power' /@10 nt901 best5. ' to group 1 and ' @35 nt902 best5. ' to group 2'; if p_wil<.20 then put // @10 'Warning: Power is too low for accurate Wilcoxon Brownian Motion projection' // @10 'Try larger sample sizes, say double, to get new projection'; end; time2=datetime();time1=time2-time1; put // @30 'Total Run Time=' time1 7.2 ' Seconds'; run; %mend; title 'Example 2.12 with n1=27 and n2=54'; %wilpower(27,54,.05,100000,6371,7357,1, -2,0,0, -1,0,.1, 0,.5,.3, 1,1,.6, 2,1,1, .,.,., .,.,., .,.,., .,.,., .,.,.); ods rtf close; run;