#! /bin/csh -f

#
# groupreg-sess
#
# Original Author: Doug Greve
# CVS Revision Info:
#    $Author: nicks $
#    $Date: 2007/01/09 22:41:17 $
#    $Revision: 1.9 $
#
# Copyright (C) 2002-2007,
# The General Hospital Corporation (Boston, MA). 
# All rights reserved.
#
# Distribution, usage and copying of this software is covered under the
# terms found in the License Agreement file named 'COPYING' found in the
# FreeSurfer source code root directory, and duplicated here:
# https://surfer.nmr.mgh.harvard.edu/fswiki/FreeSurferOpenSourceLicense
#
# General inquiries: freesurfer@nmr.mgh.harvard.edu
# Bug reports: analysis-bugs@nmr.mgh.harvard.edu
#


set VERSION = '$Id: groupreg-sess,v 1.9 2007/01/09 22:41:17 nicks Exp $';

set inputargs = ($argv);

set groupid   = ();
set analysis  = ();
set contrast  = ();
set xmat      = ();
set gconmat   = ();
set gconname  = ();
set monly     = 0;
set MLF       = ();
set nolog     = 0;
set volstem   = ces;
set spacename = ();
set spacedir  = ();
set hemi      = ();
set hemicode  = ();
set PrintHelp = 0;
set WLS       = 0;
set synth     = 0;
set nthframe  = 1;

if($#argv == 0)  goto usage_exit;
set n = `echo $argv | grep -e -help | wc -l` 
if($n != 0) then
  set PrintHelp = 1;
  goto usage_exit;
endif
set n = `echo $argv | grep version | wc -l` 
if($n != 0) then
  echo $VERSION
  exit 0;
endif
set n = `echo $argv | grep nolog | wc -l` 
if($n != 0) set nolog = 1;

set SessList = `getsesspath $argv`;
if($status || $#SessList == 0) then
  echo "ERROR: cannot find any sessions" 
  echo $SessList
  exit 1;
endif

goto parse_args;
parse_args_return:

goto check_params;
check_params_return:

set nclines = `wc -l $gconmat | awk '{print $1}'`;
if($nclines != 1) then
  echo "ERROR: the number of lines in $gconmat is $nclines."
  echo "       There can only be one line."
  exit 1;
endif

##### Create a log file ######
if(! $nolog) then
  set logdir = `pwd`/log;
  mkdir -p $logdir
  if(! -e $logdir) then
    echo "ERROR: could not create $logdir"
    exit 1;
  endif
  set LF = $logdir/groupreg-sess.log
  if(-e $LF) mv $LF $LF.old
else
  echo "No log file"
  set LF = /dev/null
endif

echo "----------------------------------------------------------"
echo "groupreg-sess logfile is $LF"
echo "----------------------------------------------------------"

echo "groupreg-sess log file" >> $LF
echo $VERSION >> $LF
pwd           >> $LF
echo $0     >> $LF
echo $inputargs  >> $LF
uname -a      >> $LF
date          >> $LF

## Get functional subdirectory from the info file ##
set infofile = $analysis/analysis.info
if(! -e $infofile) then
  echo "ERROR: cannot find $infofile"
  exit 1;
endif
set fsd = `cat $infofile | awk '{if($1 == "fsd") print $2}'`;

mkdir -p $groupid
set sessinfo = $groupid/session.info  
if(-e $sessinfo) then
  @ n1 = `cat $sessinfo | wc -l`;
  @ n1 = $n1  - 1;
  if($n1 != $#SessList) then
    echo "ERROR: Current Session List is inconistent with previous group average"\
         |& tee -a $LF
    exit 1;
  endif
  foreach sess ($SessList)
    set sessid = `basename $sess`
    set n1 = `cat $sessinfo | grep -x $sessid | wc -l`;
    if($n1 == 0) then
      echo "ERROR: $sessid was not in original group average"
      exit 1;
    endif
  end
else
  echo "SessionType: GroupAverage" > $sessinfo
  foreach sess ($SessList)
    set sessid = `basename $sess`
    echo $sessid >> $sessinfo
  end
endif


set outdir = $groupid/$fsd/$analysis/$spacedir-rfx/$gconname
mkdir -p $outdir

# Check that the input volume is there for each subj
set InstemList = ();
set InVarStemList = ();
set FLAXMatList = ();
foreach sess ($SessList)
  set instem = $sess/$fsd/$analysis/$spacedir/$contrast/$volstem$hemicode
  set fname = $instem"_000.hdr";
  if(! -e $fname) then
    echo "ERROR: $fname does not exist"
    exit 1;
  endif

  set tmp = `cat $fname`;
  set nf = $tmp[3];
  if($nf > 1 && 0) then
    echo "ERROR: contrast $contrast has multiple frames."
    exit 1;
  endif

  set InstemList = ($InstemList $instem);
  if($WLS) then
    set varstem = $sess/$fsd/$analysis/$spacedir/$contrast/cesvar$hemicode
    if(! -e $varstem.bhdr) then
      echo "ERROR: cannot find $varstem.bhdr"
      exit 1;
    endif
    set InVarStemList = ($InVarStemList $varstem);
  endif
  if("$spacename" == "tal") then
    set reg = $sess/$fsd/$analysis/$spacedir/register.dat
    cp $reg $groupid/$fsd/$analysis/$spacedir-rfx
  endif
  set FLAXMat = $sess/$fsd/$analysis/X.mat
  if(! -e $FLAXMat) then
    echo "ERROR: cannot find $FLAXMat"
    exit 1;
  endif
  set FLAXMatList = ($FLAXMatList $FLAXMat);
end

set FLAConMat = $analysis/$contrast.mat
if(! -e $FLAConMat) then
  echo "ERROR: cannot find $FLAConMat"
  exit 1;
endif

if(! $monly) set MLF = /tmp/groupreg-sess-$$.m
rm -f $MLF 
echo "MLF is $MLF" |& tee -a $LF

set okfile = /tmp/groupreg-sess-$$.ok
rm -f $okfile

#--------------------------------------------#
tee -a $MLF > /dev/null <<EOF

InstemList = splitstring('$InstemList');
FLAXMatList = splitstring('$FLAXMatList');
FLAConMat = '$FLAConMat';
ninputs = size(InstemList,1);
nthframe = $nthframe;
outdir = '$outdir';
hemicode = '$hemicode';
okfile = '$okfile';
xmatfile = '$xmat';
gconmatfile = '$gconmat';
WLS = $WLS;
InVarStemList = splitstring('$InVarStemList');
QuitOnError = ~[$monly];
synth = $synth;
fast_group_glm;

return;

EOF
#--------------------------------------------#

if(! $monly) then
  cat $MLF | matlab -display iconic |& tee -a $LF
  cp $MLF $outdir
  rm -f $MLF
  if(! -e $okfile) then
    echo "ERROR: during matlab execution" |& tee -a $LF
    cp $LF $outdir
    exit 1; 
  endif
  echo "INFO: Output is saved to $outdir" |& tee -a $LF
endif

date | tee -a $LF
echo "groupreg-sess completed" | tee -a $LF
cp $LF $outdir
echo " "
echo " "

exit 0;
###############################################

############--------------##################
parse_args:
set cmdline = ($argv);
while( $#argv != 0 )

  set flag = $argv[1]; shift;
  
  switch($flag)

    case "-a":
    case "-analysis":
      if ( $#argv == 0) goto arg1err;
      set analysis = $argv[1]; shift;
      breaksw

    case "-contrast"
    case "-c"
      if ( $#argv == 0) goto arg1err;
      set contrast = $argv[1]; shift;
      breaksw

    case "-gxf":
      if ( $#argv == 0) goto arg1err;
      set xmat = $argv[1]; shift;
      if(! -e $xmat) then
        echo "ERROR: cannot find $xmat"
        exit 1;
      endif
      breaksw

    case "-gconmat"
    case "-gcm"
      if ( $#argv == 0) goto arg1err;
      set gconmat = $argv[1]; shift;
      if(! -e $gconmat) then
        echo "ERROR: cannot find $gconmat"
        exit 1;
      endif
      breaksw

    case "-gconname"
    case "-gcn"
      if ( $#argv == 0) goto arg1err;
      set gconname = $argv[1]; shift;
      breaksw

    case "-vol":
      if ( $#argv == 0) goto arg1err;
      set volstem = $argv[1]; shift;
      breaksw

    case "-frame":
      if ( $#argv == 0) goto arg1err;
      set nthframe = $argv[1]; shift;
      breaksw

    case "-group":
      if ( $#argv == 0) goto arg1err;
      set groupid = $argv[1]; shift;
      breaksw

    case "-space":
      if ( $#argv == 0) goto arg1err;
      set spacename = $argv[1]; shift;
      breaksw

    case "-spacedir":
      if ( $#argv == 0) goto arg1err;
      set spacedir = $argv[1]; shift;
      breaksw

    case "-hemi":
      if ( $#argv == 0) goto arg1err;
      set hemi = $argv[1]; shift;
      set hemicode = "-$hemi";
      breaksw

    case "-monly":
      if ( $#argv == 0) goto arg1err;
      set MLF = $argv[1]; shift;
      set monly = 1;
      breaksw

    case "-wls":
      set WLS = 1;
      breaksw

    case "-synth":
      set synth = 1;
      breaksw

    case "-umask":
      if ( $#argv == 0) goto arg1err;
      umask $argv[1]; shift;
      breaksw

    case "-debug":
      set verbose = 1;
      set echo = 1;
      breaksw

    case "-cwd":
    case "-nolog":
      breaksw

    case "-g":
    case "-s":
    case "-sf":
      echo "ERROR: cannot use flag $flag with this program."
      exit 1;
      breaksw

    case "-g":
    case "-s":
    case "-sf":
    case "-df":
    case "-d":
      # ignore getsesspath arguments 
      shift;
      breaksw

    default:
      echo ERROR: Flag $flag unrecognized. 
      echo $cmdline
      exit 1
      breaksw
  endsw

end

goto parse_args_return;
############--------------##################

############--------------##################
check_params:

  if($#analysis == 0) then
    echo "ERROR: no analysis specified "
    exit 1
  endif

  if($#contrast == 0) then
    echo "ERROR: no contrast specified "
    exit 1
  endif

  if($#gconmat == 0) then
    echo "ERROR: no group contrast mat file specified "
    exit 1
  endif

  if($#gconname == 0) then
    echo "ERROR: no group contrast name specified "
    exit 1
  endif

  if($#xmat == 0) then
    echo "ERROR: no group design matrix specified "
    exit 1
  endif

  if($#groupid == 0) then
    echo "ERROR: no gruop name specified "
    exit 1
  endif

  if($#spacename == 0) then
    echo "ERROR: no space name specified "
    exit 1
  endif

  if($spacename == sph && $#hemi == 0) then
    echo "ERROR: must specify a hemi with sph space"
    exit 1
  endif

  if($#spacedir == 0) set spacedir = $spacename

goto check_params_return;
############--------------##################

############--------------##################
arg1err:
  echo "ERROR: flag $flag requires one argument"
  exit 1
############--------------##################

############--------------##################
arg2err:
  echo "ERROR: flag $flag requires two arguments"
  exit 1
############--------------##################

############--------------##################
usage_exit:
  echo "USAGE: groupreg-sess"
  echo ""
  echo "Required:";
  echo ""
  echo "   -group    groupid"
  echo "   -analysis aname : analysis"
  echo "   -contrast cname : contrast"
  echo "   -gxf      gxfile  : group design matrix"
  echo ""
  echo "   -space spacename : tal or sph"
  echo "   -spacedir spacedir"
  echo "   -hemi hemi : lh or rh (only with sph)"
  echo ""
  echo "   -gconmat cmat : group contrast mat file"
  echo "   -gconname name : group contrast name"
  echo ""
  echo "Optional:";
  echo ""
  echo "   -frame frameno : 1-based frame no of ces to use (default is 1)"
  echo "   -vol  stem : use stem instead of ces"
  echo "   -wls : use Weighted Least Squares"
  echo ""
  echo "   -df srchdirfile ..."
  echo "   -d  srchdir     ..."
  echo ""
  echo "   -help"
  echo "   -debug"
  echo "   -umask umask   : set unix file permission mask"
  echo "   -version       : print version and exit"
  echo ""

  if(! $PrintHelp) exit 1;

  echo $VERSION

  cat $0 | awk 'BEGIN{prt=0}{if(prt) print $0; if($1 == "BEGINHELP") prt = 1 }'

exit 1;


#---- Everything below here is printed out as part of help -----#
BEGINHELP

SUMMARY

This program performs per-voxel regression for group and inter-group
random effects analysis.  The user constructs a design matrix and
contrast vector.  The sessions to be used in the analysis are specfied
in the design matrix file (see GXFILE below) instead of using the
traditional -s and -sf FSFAST flags.

The input to the regression is derived from a contrast of the
session-level analysis resampled into a common-space, usually the
ces volume. Eg, sessid/fsd/analysis/contrast/ces.

The output will be stored in
groupid/fsd/analysis/spacedir-rfx/gconname and can be accessed with
all the tools used to access any random effects analysis.


ARGUMENTS

-group groupid

groupid is the name of the group. This dictates where the output will 
be stored. In future commands, use -s groupid to access the data.

-analysis analysis (shortcut: -a)

Specifies the session-level (ie, first-level) analysis from which
the input data will come from.

-contrast cname (shortcut: -c)

Specifies the session-level (ie, first-level) contrast from which
the input data will come from. NOTE: this is different than the
group contrast (see -gconname). Currently only supports contrasts
with one frame.

-gxf gxfile

This is a specially formated file with the sessions to use in the
regression analysis as well as the design matrix. See GXFILE below.
The covariance of the design matrix must have a condition of
greater than 1000 to be considered not ill-conditioned. See 
ILL-CONDITIONED DESIGN MATRIX below.

-space spacename

Get input data from the analysis resampled into space
spacename. spacename can be tal (talairach) or sph (spherical).
If sph, then you must supply a hemisphere. See func2tal-sess
and func2sph-sess.

-spacedir spacedir

By default the spacedir is spacename. Under some circumstances the
actual name of the directory where the resampled data resides
will be different than spacename (eg, when the spherical data is
smoothed with sphsmooth-sess). 

-hemi hemisphere

Hemisphere to analyze (with -space sph only). Legal values are lh and
rh.  Only one is allowed.

-gconmat cmat

This is a text file with the group contrast vector. It should be a
single line with contrast weights. The number of weights must equal to
the number of columns in the design matrix. Note that the group-level
contrast is different than the session-level contrast.

-gconname name

This is the name that will be assigned to the group contrast. This
determines the subdirectory where the output will be stored.  Note
that the group-level contrast is different than the session-level
contrast (specified with -contrast). This is the gconname name to be
used when access the data at later stages (eg, viewing).

-vol volstem

Input volume. By default, ces (the contrast effect size) from the 
session-level contrast will be used. This volume is created by
stxgrinder-sess. Other volumes that can be used are pctces (the
percent CES) are t (the t-ratio).

-frame frameno

One-based frame number of the ces volume to use. Default is 1 (ie,
the first frame). The number of frames in the ces is equal to the
number of rows in the contrast matrix.

-wls

Use Weighted Least Squares (WLS) where the weight for a subject is 
inverse of the contrast standard deviation from the lower level
analysis (this is the cesvar volume). The weights are normalized to 
sum to 1. The weights are computed on a voxel-by-voxel basis, 
which can slow things down considerably.

GROUP DESIGN MATRIX (GXFILE)

The input sessions and design matrix are specified by the group desgin
matrix file (gxfile -- passed with the -gxf flag). This is a specially
formatted text file. The design matrix is a matrix with the number of
rows equal to the number of sessions, and the number of columns equal 
to the number of regressors. The regressors code such information as
the group/class membership, age, IQ, etc. 

All lines that begin with # are ignored (this allows for comments
inside the file). All blank lines are ignored. The non-blank,
non-comment lines are referred to as "data lines". With the exception
of the first data line, the first column of the data line holds the
session name. The remaining columns are the regression components for
that session (eg, group membership, age, IQ, etc).  The first data
line must be a list of labels of the regressors in the design matrix,
with the first column being arbitrary.

EXAMPLE

Group Design Matrix File (x.mat):
------cuthere---------
# my group study
xxx     subj1 subj2  age
mgh-101.1   1     0   25
mgh-101.2   1     0   25

mgh-103.1   0     1   35
mgh-103.2   0     1   35
------cuthere---------

The first line is a comment. The next line is the first data line
and so holds the label. "xxx" is just a place-holder string. It
can be anything; it just has to be there. "subj1", "subj2", and "age"
are the labels for the regressors. There are four sessions (the
blank line is ignored). The design matrix itself is 4x3.

Group Contrast Vector File (c.mat):
------cuthere---------
1 -1 0
------cuthere---------
This is a single line with 3 values, one for each regressor in 
the design matrix. This particular contrast will test the difference
between subj1 and subj2 (after accounting for the effects of age).

Command-line:

  groupreg-sess -a sm-sm5 -c allvres -df sessdir \
       -space tal -gxf x.mat -gconmat c.mat  \
       -group group-tst -gconname g.allvres

This will use the ces volume from session-level analysis sm-sm5 and
contrast allvres as resampled into talairach space. The sessions to
use are found in x.mat as explained above. The output will be stored
in group-tst/bold/sm-sm5/tal-rfx/g.allvres. The output volumes are:

  beta - regression coefficients
  beta-var - variance of the residual error of the regression
  ces - the group-level contrast effect size
  f - F-ratio of the group-level contrast
  fsig - significnace of the F-ratio (ie, F-test).

To view the significance using tkmedit-sess:

  tkmedit-sess -s group-tst -a sm-sm5 -c g.allvres -map fsig\
      -space tal -isxavg random
  

ILL-CONDITIONED DESIGN MATRIX

Setting up a design matrix can be tricky. If you do not do it
correctly, it can become "ill-conditioned" or "singular". This means
that one of the regressor columns is equal to another column (which is
easy to find) or is equal to some weighted combination of other
columns (which is almost impossible to find). When the condition
number of the design matrix exceeds 1000, it is considered
ill-conditioned. If you cannot determine what is wrong with it, see a
statistician or someone else who knows about design matrices.

