/*

*********************************************************************

Wilcoxon Sample Size

Two input datasets are required:
Data x: (One observation): Input in the following order
alpha=type I Error
Power=Power of Test
Side (1 for one-sided  or 2 for two-sided)

Data y:

Data are input as two columns, giving the marginal probabilities f and g for the two 
distibutions.Entries are assumed to be in order (lowest value of x to highest).  If 
the probabilities sum to less than one, the table will be automatically augmented with 
one more row to fill the void.  If the probabilities add to something greater than one, 
the last row(s) will be automatically be capped to sum to one and a warning is issued.

Output is as follows:
N=Sample Size Need for each treatment

  The asymptotic variance of the Wilcoxon Test, per Lehmann's Book, Chapter 2.3.
  Lehmann, EL (1975) Nonparametrics: Statistical methods based on ranks. Holden-Day
  San Francisco
*********************************************************************
*/
data time1;time1=time();sid=1;output;proc sort;by sid;
data x;input alpha power side;t=1;cards;
.05 .8 2
proc sort;by t;
/*


*/
data x;set x;by t;if first.t;
if alpha>1 then alpha=.01*alpha;
za=-probit(alpha/side);
zb=probit(power);
data y;input f g;
cards;
.33333  .5
.33333  .33333




data y2;set y;t=1;k=_n_;proc sort;by t k;
data y;set y;
/*
************************************************
Here is where we calculate the effect size (Population Kendall's Tau)
and Variance of the Mann-Whitney-Wilcoxon Statistic under the parametrization above
We use the classical null variance for the simulations
*/
length warning $40.;
if p=. then p=0;
if q=. then q=0;
if c=. then c=0;
if d=. then d=0;
p=p+f;q=q+g;
c=c+f*(1-q);
d=d+g*(1-p);
e=c-d;
retain p q c d ;
warning='None   ';
if p>1.001 or q>1.001 then warning='Fails: Cum Dist>1';

data yy;set y;
t=1;k=_n_;
proc sort;by t k;


data w1;set yy;by t;if last.t;
f=1-p;g=1-q;k=k+1;p=1;q=1;
if f>.001 or g>.001 then output;
data yt;set yy w1;proc sort;by k;
data w1;set yt;t=1;proc sort;by t k;
data w1;set w1;by t;if last.t;nrow=k;drop k;
data yi;set yt;i=k;fi=f;gi=g;keep i fi gi;
proc sort;by i;
data yj;set yt;j=k;fj=f;gj=g;keep j fj gj;
proc sort;by j;
data yk;set yt;fk=f;gk=g;keep k fk gk;
proc sort;by k;
data wilc;set w1;
do i=1 to nrow;
do j=1 to nrow;
do k=1 to nrow;
output;
end;
end;
end;
data wilc;set wilc;keep i j k;
proc sort;by i;
data wilc;merge wilc yi;by i;
proc sort;by j;
data wilc;merge wilc yj;by j;
proc sort;by k;
data wilc;merge wilc yk;by k;
data wilc;set wilc;
q=0;r=0;if (i>k+.01 and j>k+.01) or (i<k-.01 and j<k-.01) then do;
q=fi*fj*gk;r=gi*gj*fk;
end;
if (i>k+.01 and j<k-.01) or (i<k-.01 and j>k+.01) then do;
q=-fi*fj*gk;r=-gi*gj*fk;
end;
qplusr=q+r;
proc means noprint n sum;var q r qplusr;
output out=new sum=q r qplusr;
data new;set new;t=1;proc sort;by t;
data new;update new w1;by t;
avar=qplusr-2*e**2;
data new;merge new x;by t;
drop f g;
data y2;merge y2 new;by t;
n=int(.5+avar*(((za+zb)/e)**2));
data y2;set y2;
data y2;set y2;
k=_n_;proc sort;by t k;
data y2;set y2;if sumf=. then sumf=0;
if sumg=. then sumg=0;
sumf=sumf+f;
sumg=sumg+g;
retain sumf sumg;
data extra;set y2;by t;if last.t;
if sumf<.999 then f=1-sumf;
if sumg<.999 then g=1-sumg;
k=k+1;
kmax=1;if sumf>1.01 or sumg>1.01 then warning='Sum of Probabilities Exceeds 1.0';
data y2;set y2 extra;proc sort;by k;
data y2;set y2;
if sumf>.9999 and sumg>.9999 and kmax=. then delete;
if sumf>.9999 and sumg>.9999 and kmax=1 then k=k-1;

file print;
if k=1 then do;
put @10 'Wilcoxon Sample Size Program';
put @10 'Alpha=' alpha ' ' side '-sided'  @35 'Power=' power;
put @10 'Sample Size Required=' n ' Per Group' //;
put @15 'Ordinal Value'  @34 'Prob for Control'  @54 'Prob for Exp'; 
end;
put @15 k @34 f @54 g;
if kmax=1 then put // @10 'Warning=' warning;

run;