#!/bin/tcsh -f
set VERSION = '$Id: mri_glmfit-sim,v 1.1.2.13 2009/01/18 01:26:30 greve Exp $';

set inputargs = ($argv);

set glmdir = ()
set nulltype = ();
set nsim = ()
set thresh = ()
set csdbase = ()
set simsign = ()
set Overwrite = 0;
set DoSim = 1;
set LF = ();
set AllDoneFile = ();
set Seed = ();
set fwhmOverride = ();
set UseUniformPDF = 0;
set UniformPDFMin = ();
set UniformPDFMax = ();
set DiagCluster = 0;

set nJobs = 1;
set DoBackground = 0;
set DoPBSubmit = 0;
set DoPoll = 0;
set tSleepSec = 10;

set tmpdir = ();
set CleanUp = 1;
set PrintHelp = 0;

if($#argv == 0) goto usage_exit;
set n = `echo $argv | grep -e --help | wc -l` 
if($n != 0) then
  set PrintHelp = 1;
  goto usage_exit;
endif
set n = `echo $argv | grep -e --version | wc -l` 
if($n != 0) then
  echo $VERSION
  exit 0;
endif

goto parse_args;
parse_args_return:

goto check_params;
check_params_return:

if(! -e $glmdir) then
  echo "ERROR: cannot find $glmdir"
  exit 1;
endif

set glmfitlog = $glmdir/mri_glmfit.log
if(! -e $glmfitlog) then
  echo "ERROR: cannot find $glmfitlog"
  exit 1;
endif

set mask = `stem2fname $glmdir/mask`
if($status) then
  echo "$mask"
  exit 1;
endif

set fwhmfile = $glmdir/fwhm.dat
if(! -e $fwhmfile) then
  echo "ERROR: cannot find $fwhm"
  exit 1;
endif
# This may be overridden
set fwhm = `cat $fwhmfile`;

set glmfitcwd = `cat $glmfitlog | awk '{if($1 == "cwd") print $2}'`
if(! -e $glmfitcwd) then
  echo "ERROR: cannot find $glmfitcwd"
  exit 1;
endif

set anattype = volume;
set subject = ();
set hemi = ();
set y = ();

# Go through the original mri_glmfit command-line
set glmfitcmd = `cat $glmfitlog | awk '{if($1 == "cmdline") print $0}'`
set gd2mtx = dods
while($#glmfitcmd)
  set flag = $glmfitcmd[1]; shift glmfitcmd;
  switch($flag)
  case "--surf"
    set subject = $glmfitcmd[1]; shift glmfitcmd;
    set hemi    = $glmfitcmd[1]; shift glmfitcmd;
    set anattype = surface;
    breaksw
  case "--fsgd"
    shift glmfitcmd;
    if($#glmfitcmd > 0) then
      if($glmfitcmd[1] == doss) set gd2mtx = doss
    breaksw
  case "--y"
    set y = $glmfitcmd[1]; shift glmfitcmd;
    if(! -e $y) then
      set y = $glmdir/`basename $y`
      if(! -e $y) then
        echo "ERROR: cannot find $y"
        exit 1;
      endif
    endif
    breaksw
  default
    breaksw 
  endsw
end

# Make sure the subject exists
if($#subject != 0) then
  if(! -e $SUBJECTS_DIR/$subject) then
    echo "ERROR: cannot find $subject in $SUBJECTS_DIR"
    exit 1;
  endif
  echo "SURFACE: $subject $hemi"
endif

# Get a list of contrasts, etc
if($#tmpdir == 0) set tmpdir = $glmdir/tmp.mri_glmfit-sim-$$
mkdir -p $tmpdir
set clist = ($glmdir/*/C.dat)
if($status) then
  echo "ERROR: cannot find any contrasts"
  exit 1;
endif
set clist2 = ();
set conlist = ();
foreach c ($clist)
  set tmp = `dirname $c`
  set conname = `basename $tmp`
  set conlist = ($conlist $conname);
  set confile = $tmpdir/$conname.mtx
  cp $c $confile
  set clist2 = ($clist2 --C $confile);

  # Make sure sig is there
  set sig = `stem2fname $glmdir/$conname/sig`
  if($status) then
    echo "$sig"
    exit 1;
  endif

  # Make sure that csd is not there
  if($DoSim) then
    ls $glmdir/csd/$csdbase.j???-$conname.csd >& /dev/null
    set csdexists = (! $status)
    if($csdexists) then
      set csdfiles = ($glmdir/csd/$csdbase.j???-$conname.csd)
      if(! $Overwrite) then
        echo "ERROR: $csdfiles already exists"
        echo "Delete it or run with --overwrite"
        exit 1;
      endif
      rm -f $csdfiles
    endif
  endif
end

set StartDate = `date`

# Create log file
if($#LF == 0) then
  set LF = $glmdir/$csdbase.mri_glmfit-sim.log
  rm -f $LF
endif
#pwd | tee -a $LF
echo $0 | tee -a $LF
echo $inputargs | tee -a $LF
echo $VERSION  | tee -a $LF
date | tee -a $LF
uname -a | tee -a $LF
echo $user | tee -a $LF
echo "setenv SUBJECTS_DIR $SUBJECTS_DIR" | tee -a $LF
echo "FREESURFER_HOME $FREESURFER_HOME" | tee -a $LF
echo "DoPoll = $DoPoll" | tee -a $LF
echo "DoPBSubmit = $DoPBSubmit" | tee -a $LF
echo "DoBackground = $DoBackground" | tee -a $LF
echo "DiagCluster = $DiagCluster" | tee -a $LF
echo "gd2mtx = $gd2mtx" | tee -a $LF
if($#Seed) echo "Seed = $Seed" | tee -a $LF
echo "fwhm = $fwhm" | tee -a $LF
if($#fwhmOverride) then 
  # Change fwhm from value in glmdir to override value
  echo "fwhmOverride = $fwhmOverride" | tee -a $LF
  set fwhm = $fwhmOverride
endif

if($DoSim) then
  # Use fsgd if there, if not use X
  set fsgd = ();
  set X = ();
  if(-e $glmdir/y.fsgd) then
    set fsgd = $glmdir/y.fsgd
  else
    set X = $glmdir/Xg.dat
  endif

  # Number of sim iterations per job
  set nSimPerJob = `echo "$nsim/$nJobs" | bc`;
  echo "nSimPerJob = $nSimPerJob"| tee -a $LF

  # ----------------- Run glmfit simulation ----------------------
  # This may take a while
  mkdir -p $glmdir/csd
  @ nthJob = 1
  while($nthJob <= $nJobs)
    set jpad = `printf j%03d $nthJob`
    echo "$nthJob/$nJobs `date`" | tee -a $LF

    set cmd = (mri_glmfit --y $y $clist2 --mask $mask)
    set cmd = ($cmd --sim $nulltype $nSimPerJob $thresh $glmdir/csd/$csdbase.$jpad)
    set cmd = ($cmd --sim-sign $simsign)
    if($#fsgd) set cmd = ($cmd --fsgd $fsgd $gd2mtx)
    if($#X)    set cmd = ($cmd --X $X)
    if($nulltype != perm) set cmd = ($cmd --fwhm $fwhm)
    if($anattype == surface) set cmd = ($cmd --surf $subject $hemi);
    if($#Seed) set cmd = ($cmd --seed $Seed);
    if($UseUniformPDF) set cmd = ($cmd --uniform $UniformPDFMin $UniformPDFMax);
    if($DiagCluster) set cmd = ($cmd --diag-cluster)
    if($DoPoll) then
      set DoneFile = $glmdir/csd/done.$csdbase.$jpad
      rm -f $DoneFile
      set cmd = ($cmd --sim-done $DoneFile)
    endif

    echo $cmd | tee -a $LF
    if(! $DoPoll) then
      # Run directly in shell
      $cmd | tee -a $LF
      if($status && ! $DiagCluster) exit 1;
      date | tee -a $LF

      # ----------------------------------------------------
      if($DiagCluster) then
        set conname = $conlist[1];
        set sig = ./$conname-sig.mgh
        set csd = $glmdir/csd/$csdbase.$jpad-$conname.csd
        set sum = $glmdir/csd/$csdbase.$jpad-$conname.diag

        if($anattype == surface) then
          set cmd = (mri_surfcluster --subject $subject --hemi $hemi)
          set c = 4;
        endif
        if($anattype == volume) then
          set cmd = (mri_volcluster)
          set c = 3;
        endif
        set cmd = ($cmd --in $sig --mask $mask \
         --sum $sum --thmin $thresh --sign $simsign)
        echo $cmd | tee -a $LF
        $cmd | tee -a $LF
        if($status) exit 1;

        set maxclustB = `grep -v \# $csd | awk '{print $3}' | sort -nr | head -n 1`;
        set maxclustA = `grep -v \# $sum | awk -v c=$c '{print $c}' | sort -nr | head -n 1`;
        echo "MaxClusterSizeA $maxclustA"| tee -a $LF
        echo "MaxClusterSizeB $maxclustB"| tee -a $LF
        exit 1;
      endif

      # ----------------------------------------------------
    else
      if($DoBackground) then
        # Run in background
        $cmd >>& $LF &
      endif
      if($DoPBSubmit) then
        # pbsubmit
        echo "PBSUBMIT ---------------------------------" | tee -a $LF
        pbsubmit -l nodes=1:opteron -c "$cmd >> $LF" | tee -a $LF
        sleep 10;
        echo "" | tee -a $LF
      endif
    endif
    @ nthJob = $nthJob + 1
  end

  if($DoPoll) then
    # Possible bug: if job above dies without creating done file
    echo "Polling" | tee -a $LF
    @ nthPoll = 0;
    set Done = 0;
    while(! $Done)
      @ nthPoll = $nthPoll + 1;
      set Done = 1;
      @ nthJob = 1
      while($nthJob <= $nJobs)
        set jpad = `printf j%03d $nthJob`
        set DoneFile = $glmdir/csd/done.$csdbase.$jpad
        if(! -e $DoneFile) then
          echo "Poll $nthPoll job $nthJob `date`" | tee -a $LF
          set Done = 0;
          break;
        endif
      @ nthJob = $nthJob + 1
      end
      sleep $tSleepSec 
    end
  endif

endif #DoSim

if($DoPBSubmit) then
  # pbsubmit the surfcluster part
  set cmd = (mri_glmfit-sim --glmdir $glmdir  --tmp $tmpdir)
  set cmd = ($cmd --sim-sign $simsign --no-sim $csdbase)
  set cmd = ($cmd --log $LF)
  echo "------------------------------------------------" | tee -a $LF
  echo $cmd  | tee -a $LF
  pbsubmit -l nodes=1:opteron -c "$cmd"
  sleep 5;
  echo ""
  echo "Just submitted this as a job, and exiting now."
  echo ""
  # Could wait until it is done by polling for AllDoneFile
  exit 0;
endif

# -------------- now run clustering -------------------- #
foreach conname ($conlist)

  ls $glmdir/csd/$csdbase.j???-$conname.csd >& /dev/null
  if($status) then
    echo "ERROR: cannot find any csd files"| tee -a $LF
    exit 1; 
  endif

  set csdfiles = ($glmdir/csd/$csdbase.j???-$conname.csd)
  set csdlist = ();
  foreach csd ($csdfiles)
    set csdlist = ($csdlist --csd $csd);
  end

  set sig = `stem2fname $glmdir/$conname/sig`
  set vwsig = $glmdir/$conname/$csdbase.sig.voxel.mgh
  set cwsig = $glmdir/$conname/$csdbase.sig.cluster.mgh
  set ocn   = $glmdir/$conname/$csdbase.sig.ocn.mgh
  set annot = $glmdir/$conname/$csdbase.sig.ocn.annot
  set sum   = $glmdir/$conname/$csdbase.sig.cluster.summary

  if($anattype == surface) then
    set cmd = (mri_surfcluster --in $sig $csdlist --mask $mask \
      --cwsig $cwsig --vwsig $vwsig --sum $sum --ocn $ocn \
      --oannot $annot --annot aparc)
    echo $cmd | tee -a $LF
    $cmd | tee -a $LF
    if($status) exit 1;
  endif

  if($anattype == volume) then
    set cmd = (mri_volcluster --in $sig $csdlist --mask $mask \
      --cwsig $cwsig --vwsig $vwsig --sum $sum --ocn $ocn)
    echo $cmd | tee -a $LF
    $cmd | tee -a $LF
    if($status) exit 1;
  endif

  # For some reason cwsig has 4 frames, just take 1st
  set cmd = (mri_convert $cwsig $cwsig --frame 0)
  echo $cmd | tee -a $LF
  $cmd  | tee -a $LF
  if($status) exit 1;
  
end

if($CleanUp) rm -r $tmpdir

if($#AllDoneFile) touch $AllDoneFile

echo $StartDate
date
echo "mri_glmfit-sim done"

exit 0
###############################################

############--------------##################
parse_args:
set cmdline = ($argv);
while( $#argv != 0 )

  set flag = $argv[1]; shift;
  
  switch($flag)

    case "--glmdir":
      if( $#argv < 0) goto arg1err;
      set glmdir = $argv[1]; shift;
      breaksw

    case "--sim":
      if( $#argv < 4) goto arg4err;
      set nulltype = $argv[1]; shift;
      set nsim = $argv[1]; shift;
      set thresh = $argv[1]; shift;
      set csdbase = $argv[1]; shift;
      breaksw

    case "--sim-sign":
      if( $#argv < 1) goto arg1err;
      set simsign = $argv[1]; shift;
      breaksw

    case "--log":
      if( $#argv < 1) goto arg1err;
      set LF = $argv[1]; shift;
      breaksw

    case "--overwrite":
      set Overwrite = 1;
      breaksw

    case "--pbsubmit":
      if($#argv < 1) goto arg1err;
      set nJobs = $argv[1]; shift;
      set DoPBSubmit = 1;
      set DoPoll = 1;
      set DoBackground = 0;
      if(`hostname` != seychelles) then
        echo "ERROR: must be on seychelles to pbsubmit"
        exit 1;
      endif
      breaksw

    case "--bg":
      if($#argv < 1) goto arg1err;
      set nJobs = $argv[1]; shift;
      set DoBackground = 1;
      set DoPoll = 1;
      set DoPBSubmit = 0;
      breaksw

    case "--no-bg":
      set DoBackground = 0;
      set DoPoll = 0;
      breaksw

    case "--sleep":
      if($#argv < 1) goto arg1err;
      set tSleepSec = $argv[1]; shift;
      breaksw

    case "--seed":
      if($#argv < 1) goto arg1err;
      set Seed = $argv[1]; shift;
      breaksw

    case "--fwhm-override":
      if($#argv < 1) goto arg1err;
      set fwhmOverride = $argv[1]; shift;
      breaksw

    case "--tmp":
      if($#argv < 1) goto arg1err;
      set tmpdir = $argv[1]; shift;
      breaksw

    case "--uniform":
      if($#argv < 2) goto arg2err;
      set UniformPDFMin = $argv[1]; shift;
      set UniformPDFMax = $argv[1]; shift;
      set UseUniformPDF = 1;
      breaksw

    case "--no-sim":
      if($#argv < 1) goto arg1err;
      set csdbase = $argv[1]; shift;
      set DoSim = 0;
      breaksw

    case "--diag-cluster":
      set DiagCluster = 1;
      breaksw

    case "--debug":
      set verbose = 1;
      set echo = 1;
      breaksw

    default:
      echo ERROR: Flag $flag unrecognized. 
      echo $cmdline
      exit 1
      breaksw
  endsw

end

goto parse_args_return;
############--------------##################

############--------------##################
check_params:
if($#glmdir == 0) then
  echo "ERROR: must spec --glmdir"
  exit 1;
endif
if($DoSim) then
  if($#nulltype == 0) then
    echo "ERROR: must spec --sim"
    exit 1;
  endif
  if($#simsign == 0) then
    echo "ERROR: must spec --sim-sign"
    exit 1;
  endif
endif

if($DiagCluster && $DoBackground) then
  echo "ERROR: cannot background with --diag-cluster"
  exit 1;
endif
if($DiagCluster && $DoPBSubmit) then
  echo "ERROR: cannot pbsubmit with --diag-cluster"
  exit 1;
endif
if($DiagCluster && $DoSim == 0) then
  echo "ERROR: must simulate with --diag-cluster"
  exit 1;
endif

goto check_params_return;
############--------------##################

############--------------##################
arg1err:
  echo "ERROR: flag $flag requires one argument"
  exit 1
############--------------##################

############--------------##################
arg2err:
  echo "ERROR: flag $flag requires two arguments"
  exit 1
############--------------##################

############--------------##################
arg4err:
  echo "ERROR: flag $flag requires four arguments"
  exit 1
############--------------##################

############--------------##################
usage_exit:
  echo ""
  echo "mri_glmfit-sim"
  echo ""
  echo " --glmdir glmdir"
  echo " --sim nulltype nsim threshhold csdbase"
  echo " --sim-sign sign : <abs>,pos,neg"
  echo ""
  echo " --overwrite : delete previous CSDs"
  echo ""
  echo " --bg njobs : divide sim into njobs and put in background"
  echo " --sleep tSleepSec : number of seconds to sleep between background polls "
  echo ""
  echo " --log logfile : default is csdbase.mri_glmfit-sim.log"
  echo ""
  echo " --no-sim csdbase : do not simulate, only run cluster"
  echo " --seed seed : set simluation seed"
  echo " --fwhm-override fwhm : override fwhm in glmdir":
  echo " --uniform min max : use uniform PDF instead of gaussian":
  echo ""
  echo " --help"
  echo ""

  if(! $PrintHelp) exit 1;

  echo $VERSION

  cat $0 | awk 'BEGIN{prt=0}{if(prt) print $0; if($1 == "BEGINHELP") prt = 1 }'

exit 1;

#---- Everything below here is printed out as part of help -----#
BEGINHELP

This program is a script to run GLM-based simulations for corrections
for multiple comparisons on the volume or the surface. It is a
front-end for both the mri_glmfit and mri_surfcluster programs.

To use this program:

1. Run mri_glmfit on your data, giving it the glm directory
   (--glmdir), and all the other arguments.

2. Choose the type of simulation (nulltype) you want to run. The
   choices are:

mc-z - synthesized, smoothed z-field (fast)
mc-full - fully synthesized, smoothed gaussian field (slowest)
perm - permutation on original data (probably fasted one)

3. Choose the voxel/vertex-wise threshhold used to define clusters. This
   will be -log10(p), where p is the p-value. Eg, to use all voxels/vertices
   with p < .01, then set the threshhold = 2.

4. Choose the number of iterations (nsim). This is usually 10,000
   (which can take quite a while).

5. Select the CSD base name (csdbase). CSD stands for "Cluster
   Simulation Data". There will be a text file for each contrast with
   the information about the null distribution. This file will be
   called csdbase-contrast.csd, so the csdbase should be something
   descriptive. Eg, if you have mc-z, with threshold=2, and sign=pos,
   then setting csdbase to mc-z.pos.2 would be appropriate. If the 
   CSD file exists, then you must delete it or run with --overwrite.

6. Select the threshold sign (--sim-sign). This is the sign of the
   threshold used to create the clusters for cluster-wise correction
   of multiple comparisons. Options are abs (unsigned), pos
   (positive), and neg (negative).

Now run the command:

mri_glmfit-sim --glmdir glmdir \
  --sim nulltype nsim threshold csdbase \
  --sim-sign sign

Example,

mri_glmfit-sim --glmdir lh.thickness.sm05 \
  --sim mc-z 10000 3 mc-z.pos.3 --sim-sign pos

This will run mri_glmfit in simulation mode. It will find all the
contrasts that you ran in the original invocation of mri_glmfit and
create CSD files for them. It will use the mask and, for mc
simulations, the FWHM found in the glmdir.

It will then run mri_surfcluster or mri_volcluster for
each contrast, using the sig volume as input and creating the
following files in each contrast directory:

csdbase.sig.voxel.mgh 
csdbase.sig.cluster.mgh 
csdbase.sig.cluster.summary
csdbase.sig.ocn.mgh
csdbase.sig.ocn.annot (for surfaces only)

csdbase.sig.voxel.mgh - the sig volume corrected for multiple
comparisons on a voxel-wise basis. The threshhold and sign are
irrelevant. The value at each voxel is the corrected -log10(p-value)
for that voxel.

csdbase.sig.cluster.mgh - the sig volume corrected for multiple
comparisons on a cluster-wise basis. The value at each voxel
is the -log10(p), where p is the pvalue of the cluster at 
that voxel. If that voxel does not belong to a cluster, its
value will be 0. 

csdbase.sig.cluster.summary - this is the cluster summary table 
(a simple text file).

csdbase.sig.ocn.mgh - output cluster number volume. The value of the
voxel is the integer cluster number that that voxel belongs to. This
can be used like a segmentation (eg, to load into tkmedit or
use for running mri_segstats).

csdbase.sig.ocn.annot (for surfaces only) - this is an annotation
with the annotation name being the cluster number. This can be
used like any other annotation (eg, to load into tksurfer or 
as input for mri_segstats or mris_anatomical_stats).

OTHER OPTIONS

--overwrite

Overwrite existing CSD files when performing simulation. Default 
is to not overwrite so that you do not clobber CSD files that you
spent a weekend creating.

--bg njobs

CAREFUL!!!! WARNING!!! Divide the number of simulation interations
into njobs background jobs. If the simulation jobs die uncleanly for
some reason, this will never quit because it looks for a file created
by the simulation job before continuing. If this file is not created,
then it will keep polling for this file forever. ALSO: if you are not
running on a computer with multiple nodes, then this will not make it
run any faster (in fact, it will run slower).

--no-sim csdbase

Run this if you have already run the stimulation but just want to 
run the correction for multiple comparisons again. This is mostly
used for debugging.

--seed seed

Set the seed for simulation random number generator. This is mostly
used for debugging.

--fwhm-override fwhm 

Override fwhm in glmdir with given value. This is mostly
used for debugging.

