static char rcsid[] = "$Id: 12eb519574f5bdc943d9f68ece102eeb6f5134dc $";
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifndef HAVE_MEMCPY
#define memcpy(d,s,n) bcopy((s),(d),(n))
#endif
#ifndef HAVE_MEMMOVE
#define memmove(d,s,n) bcopy((s),(d),(n))
#endif

#include "stage1hr.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>		/* For memset() */
#include <math.h>
#include <ctype.h>		/* for tolower() */
#include "assert.h"
#include "mem.h"
#include "types.h"		/* Needed for HAVE_64_BIT */
#include "univcoord.h"

#include "reader.h"
#include "oligo.h"

#include "list.h"
#include "complement.h"
#include "compress.h"
#include "extension-search.h"	/* For Elt_gc */
#include "tr-extension-search.h"	/* For Tr_elt_gc */

#include "path.h"


/* Note FORMULA: formulas for querypos <-> diagonal (diagterm in call to Indexdb_read) are:

plus: diagonal = position + querylength - querypos
minus: diagonal = position + querypos + index1part

For minus, the index1part is needed in call to Indexdb_read because
position is stored at beginning of plus oligomer, which corresponds to
end of minus oligomer.  As a result, we have the following formulas:

high genomic position = diagonal (corresponds to querypos =
querylength for plus, and querypos = 0 for minus)

low genomic position = diagonal - querylength (corresponds to querypos
= 0 for plus, and querypos = querylength for minus)

Holds when we use Reader_T to read from 5' end of forward query and 3'
end of revcomp query simultaneously.  If we create a queryrc sequence,
then we can use just the plus formula, and convert the query
coordinates later.

*/


/* Affects only transcriptome-guided genomic alignment on paired-end
   reads.  If one end stops at transcriptome results and the other end
   requires genomic results, continues the first end to find genomic
   results.  Eliminates the greedy and potentially false alignment of
   one end to a transcript.  However, if transcriptome procedures miss
   alignments, then this adds time and memory significantly. */
/* #define AVOID_UNEVEN_LEVELS 1 */

#define NO_EXTENSIONS_BEFORE_ZERO 1

#define ALLOW_MIDDLE_ALIGNMENTS 1

/* #define EXTRACT_GENOMICSEG 1 */
#ifdef EXTRACT_GENOMICSEG
#define MAX_INDEXSIZE 8
#endif


/* MAX_NALIGNMENTS of 2 vs 1 gets 1600 improvements in 275,000 reads */
/* MAX_NALIGNMENTS of 3 vs 2 gets 96 improvements in 275,000 reads */
#define MAX_NALIGNMENTS 3

#define MAX_ALLOCATION 200

#define PAIRMAX_ADDITIONAL 10000 /* Allows for finding of unpaired GMAP alignments beyond pairmax */

/* static int kmer_search_sizelimit = 100; */
/* static int stage1hr_sizelimit = 3000; */
/* static int extension_search_sizelimit = 3000; */

static Indexdb_T indexdb_fwd;
static Indexdb_T indexdb_rev;
static Indexdb_T indexdb_tr;

static int index1part;
static int index1part_tr;
static int index1interval;

static int leftreadshift;
static Oligospace_T oligobase_mask; /* same as kmer_mask */

/* Other distances */
static Chrpos_T shortsplicedist_novelend;

static Transcriptome_T transcriptome;


#define A_CHAR 0x0
#define C_CHAR 0x1
#define G_CHAR 0x2
#define T_CHAR 0x3


/* Originally allowed only 1, to print only unique translocations.
   But need to allow enough to avoid missing some translocations. */
/* For transcript splicing, need to increase MAXCHIMERAPATHS */
/* #define MAXCHIMERAPATHS 100 */
#define MAXCHIMERAPATHS 10000

#define NREQUIRED_FAST 2	/* For candidate generation using
				   multimiss.  A value of 2 implies 
				   specificity of a 24-mer, which
				   should be low for a human-sized
				   genome */

#define MAX_INDEX1INTERVAL 3



/* Overall flow */
#ifdef DEBUG
#define debug(x) x
#else
#define debug(x)
#endif

/* Stage1_list_paths */
#ifdef DEBUG1
#define debug1(x) x
#else
#define debug1(x)
#endif

/* Stage1_init */
#ifdef DEBUG2
#define debug2(x) x
#else
#define debug2(x)
#endif

/* Filling oligos */
#ifdef DEBUG8
#define debug8(x) x
#else
#define debug8(x)
#endif

/* consolidate_paired_results and choose_among_paired */ 
#ifdef DEBUG16
#define debug16(x) x
#else
#define debug16(x)
#endif


#define T Stage1_T
T
Stage1_new (int querylength) {
  T new;

  /* Previously required querylength >= index1part, but that doesn't
     work with chopping for poly-A/T */
  new = (T) MALLOC(sizeof(*new));

  int overhang = index1interval-1;
  int mod;

  new->unsolved_sense_paths_gplus = NULL;
  new->unsolved_sense_paths_gminus = NULL;
  new->unsolved_antisense_paths_gplus = NULL;
  new->unsolved_antisense_paths_gminus = NULL;

  new->unextended_sense_paths_gplus = NULL;
  new->unextended_sense_paths_gminus = NULL;
  new->unextended_antisense_paths_gplus = NULL;
  new->unextended_antisense_paths_gminus = NULL;

  new->extended_sense_paths_gplus = NULL;
  new->extended_sense_paths_gminus = NULL;
  new->extended_antisense_paths_gplus = NULL;
  new->extended_antisense_paths_gminus = NULL;

  new->sense_paths_gplus = NULL;
  new->sense_paths_gminus = NULL;
  new->antisense_paths_gplus = NULL;
  new->antisense_paths_gminus = NULL;


  new->validp = (bool *) CALLOC(querylength+overhang,sizeof(bool));

  new->forward_oligos = (Oligospace_T *) MALLOC((querylength+overhang)*sizeof(Oligospace_T));
  new->revcomp_oligos = (Oligospace_T *) MALLOC((querylength+overhang)*sizeof(Oligospace_T));

  if (transcriptome == NULL) {
    new->retrievedp_allocated = (bool *) CALLOC(2 * (querylength+overhang),sizeof(bool));
    new->plus_retrievedp = &(new->retrievedp_allocated[overhang]);
    new->minus_retrievedp = &(new->retrievedp_allocated[(querylength+overhang)+overhang]);
    new->tr_plus_retrievedp = new->tr_minus_retrievedp = (bool *) NULL;
  } else {
    new->retrievedp_allocated = (bool *) CALLOC(4 * (querylength+overhang),sizeof(bool));
    new->plus_retrievedp = &(new->retrievedp_allocated[overhang]);
    new->minus_retrievedp = &(new->retrievedp_allocated[(querylength+overhang)+overhang]);
    new->tr_plus_retrievedp = &(new->retrievedp_allocated[2*(querylength+overhang)+overhang]);
    new->tr_minus_retrievedp = &(new->retrievedp_allocated[3*(querylength+overhang)+overhang]);
  }
    

#ifdef LARGE_GENOMES
  new->positions_high_allocated = (unsigned char **) CALLOC(2 * (querylength+overhang),sizeof(unsigned char *));
  new->plus_positions_high = &(new->positions_high_allocated[overhang]);
  new->minus_positions_high = &(new->positions_high_allocated[(querylength+overhang)+overhang]);
#endif

  if (transcriptome == NULL) {
    new->positions_allocated = (UINT4 **) CALLOC(2 * (querylength+overhang),sizeof(UINT4 *));
    new->plus_positions = &(new->positions_allocated[overhang]);
    new->minus_positions = &(new->positions_allocated[(querylength+overhang)+overhang]);
    new->tr_plus_positions = new->tr_minus_positions = (UINT4 **) NULL;
  } else {
    new->positions_allocated = (UINT4 **) CALLOC(4 * (querylength+overhang),sizeof(UINT4 *));
    new->plus_positions = &(new->positions_allocated[overhang]);
    new->minus_positions = &(new->positions_allocated[(querylength+overhang)+overhang]);
    new->tr_plus_positions = &(new->positions_allocated[2*(querylength+overhang)+overhang]);
    new->tr_minus_positions = &(new->positions_allocated[3*(querylength+overhang)+overhang]);
  }

  if (transcriptome == NULL) {
    new->npositions_allocated = (int *) CALLOC(2 * (querylength+overhang),sizeof(int));
    new->plus_npositions = &(new->npositions_allocated[overhang]);
    new->minus_npositions = &(new->npositions_allocated[(querylength+overhang)+overhang]);
    new->tr_plus_npositions = new->tr_minus_npositions = (int *) NULL;
  } else {
    new->npositions_allocated = (int *) CALLOC(4 * (querylength+overhang),sizeof(int));
    new->plus_npositions = &(new->npositions_allocated[overhang]);
    new->minus_npositions = &(new->npositions_allocated[(querylength+overhang)+overhang]);
    new->tr_plus_npositions = &(new->npositions_allocated[2*(querylength+overhang)+overhang]);
    new->tr_minus_npositions = &(new->npositions_allocated[3*(querylength+overhang)+overhang]);
  }

  /* Need to allocate (max_mismatches+MISMATCH_EXTRA), where
     max_mismatches is provided to Genomebits_mismatches_left or
     Genome_mismatches_right */
  /* new->mismatch_positions_alloc = (int *) MALLOC((querylength+MISMATCH_EXTR)*sizeof(int)); */
  new->positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
  new->indelinfo = Indelinfo_new(querylength);
  new->spliceinfo = Spliceinfo_new(querylength);
  new->mergeinfo_tr = Mergeinfo_uint4_new(querylength);
#ifdef LARGE_GENOMES
  new->mergeinfo = Mergeinfo_uint8_new(querylength);
#else
  new->mergeinfo = Mergeinfo_uint4_new(querylength);
#endif

  /* Memory allocated for Segment_identify in segment-search.c, and
     Merge_diagonals in kmer-search.c (which needs four sets of
     arrays) */
#ifdef LARGE_GENOMES
  new->stream_high_alloc = (unsigned char **) MALLOC(4*querylength*sizeof(unsigned char *));
  new->gplus_stream_high_array_5 = &(new->stream_high_alloc[0]);
  new->gminus_stream_high_array_5 = &(new->stream_high_alloc[querylength]);
  new->gplus_stream_high_array_3 = &(new->stream_high_alloc[2*querylength]);
  new->gminus_stream_high_array_3 = &(new->stream_high_alloc[3*querylength]);

  new->stream_low_alloc = (UINT4 **) MALLOC(4*querylength*sizeof(UINT4 *));
  new->gplus_stream_low_array_5 = &(new->stream_low_alloc[0]);
  new->gminus_stream_low_array_5 = &(new->stream_low_alloc[querylength]);
  new->gplus_stream_low_array_3 = &(new->stream_low_alloc[2*querylength]);
  new->gminus_stream_low_array_3 = &(new->stream_low_alloc[3*querylength]);
#endif

  /* 2 localdb regions possible if shortsplicedist_novelend < 65536 */
  new->streamspace_max_alloc = 2 * shortsplicedist_novelend;
  MALLOC_ALIGN(new->streamspace_alloc,2* shortsplicedist_novelend*sizeof(Univcoord_T));
  new->streamptr_alloc = (Univcoord_T **) MALLOC(4*querylength*sizeof(Univcoord_T *));
  new->gplus_stream_array_5 = &(new->streamptr_alloc[0]);
  new->gminus_stream_array_5 = &(new->streamptr_alloc[querylength]);
  new->gplus_stream_array_3 = &(new->streamptr_alloc[2*querylength]);
  new->gminus_stream_array_3 = &(new->streamptr_alloc[3*querylength]);

#ifdef LARGE_GENOMES
  new->tplus_stream_array = new->gplus_stream_low_array_5;
  new->tminus_stream_array = new->gminus_stream_low_array_5;
#else
  new->tplus_stream_array = new->gplus_stream_array_5;
  new->tminus_stream_array = new->gminus_stream_array_5;
#endif

  new->streamsize_alloc = (int *) MALLOC(4*querylength*sizeof(int));
  new->tplus_streamsize_array = new->gplus_streamsize_array_5 = &(new->streamsize_alloc[0]);
  new->tminus_streamsize_array = new->gminus_streamsize_array_5 = &(new->streamsize_alloc[querylength]);
  new->gplus_streamsize_array_3 = &(new->streamsize_alloc[2*querylength]);
  new->gminus_streamsize_array_3 = &(new->streamsize_alloc[3*querylength]);

  new->querypos_diagterm_alloc = (int *) MALLOC(4*querylength*sizeof(int));
  new->tplus_diagterm_array = new->gplus_diagterm_array_5 = &(new->querypos_diagterm_alloc[0]);
  new->tminus_diagterm_array = new->gminus_diagterm_array_5 = &(new->querypos_diagterm_alloc[querylength]);
  new->gplus_diagterm_array_3 = &(new->querypos_diagterm_alloc[2*querylength]);
  new->gminus_diagterm_array_3 = &(new->querypos_diagterm_alloc[3*querylength]);


  for (mod = 0; mod < 2*index1interval; mod++) {
#ifdef LARGE_GENOMES
    new->plus_positions_high_end5[mod] = (unsigned char *) NULL;
    new->minus_positions_high_end5[mod] = (unsigned char *) NULL;
    new->plus_positions_high_end3[mod] = (unsigned char *) NULL;
    new->minus_positions_high_end3[mod] = (unsigned char *) NULL;
#endif
    new->plus_positions_end5[mod] = (UINT4 *) NULL;
    new->minus_positions_end5[mod] = (UINT4 *) NULL;
    new->plus_positions_end3[mod] = (UINT4 *) NULL;
    new->minus_positions_end3[mod] = (UINT4 *) NULL;
  }

  /* Uses Listpool_T procedures */
  new->queryfwd_plus_set = (List_T) NULL;
  new->queryfwd_minus_set = (List_T) NULL;
  new->queryrev_plus_set = (List_T) NULL;
  new->queryrev_minus_set = (List_T) NULL;

  new->tr_queryfwd_plus_set = (List_T) NULL;
  new->tr_queryfwd_minus_set = (List_T) NULL;
  new->tr_queryrev_plus_set = (List_T) NULL;
  new->tr_queryrev_minus_set = (List_T) NULL;

  return new;
}


#ifdef DEBUG1
void
Stage1_dump (T this, int querylength) {
  int query_lastpos = querylength - index1part, querypos;
  int i;

  printf("Stage1_dump\n");
  for (querypos = 0; querypos <= query_lastpos; querypos++) {
    if (this->plus_retrievedp[querypos] == true) {
      printf("plus  %d %s (%d):",querypos,Oligo_one_nt(this->forward_oligos[querypos],index1part),
	     this->plus_npositions[querypos]);
      for (i = 0; i < this->plus_npositions[querypos]; i++) {
	printf(" %u",this->plus_positions[querypos][i]);
      }
      printf("\n");
    }
    if (this->minus_retrievedp[querypos] == true) {
      printf("minus %d %s (%d):",querypos,Oligo_one_nt(this->revcomp_oligos[querypos],index1part),
	     this->minus_npositions[querypos]);
      for (i = 0; i < this->minus_npositions[querypos]; i++) {
	printf(" %u",this->minus_positions[querypos][i]);
      }
      printf("\n");
    }
  }
  printf("\n");
  return;
}
#endif


/* Fills in ends */
void
Stage1_init (T this, char *queryuc_ptr, int querylength, int genestrand) {
  Reader_T reader;
  Oligostate_T last_state;
  Oligospace_T forward, revcomp, forward_oligo, revcomp_oligo;
  int querypos, query_lastpos;
  int mod;


  assert(querylength >= index1part + index1interval - 1);

  debug2(printf("%s\n",queryuc_ptr));
  reader = Reader_new(queryuc_ptr,/*querystart*/0,/*queryend*/querylength,/*oligosize*/index1part);
  last_state = INIT;
  forward = revcomp = 0;
  mod = 0;

  while (mod < index1interval &&
	 (last_state = Oligo_next_5(last_state,&querypos,&forward,&revcomp,reader,genestrand)) != DONE) {
    while (mod < index1interval && mod < querypos) {
      debug2(printf("Skipping positions_end5 %d, because querypos is not the expected one\n",mod));
#ifdef LARGE_GENOMES
      this->plus_positions_high_end5[mod] = this->plus_positions_high[mod] = (unsigned char *) NULL;
      this->minus_positions_high_end5[mod] = this->minus_positions_high[mod] = (unsigned char *) NULL;
#endif
      /* this->validp[mod] = false; -- default value */
      this->plus_positions_end5[mod] = this->plus_positions[mod] = (UINT4 *) NULL;
      this->minus_positions_end5[mod] = this->minus_positions[mod] = (UINT4 *) NULL;
      this->plus_npositions_end5[mod] = this->plus_npositions[mod] = 0;
      this->minus_npositions_end5[mod] = this->minus_npositions[mod] = 0;
      this->plus_retrievedp[mod] = true;
      this->minus_retrievedp[mod] = true;
      mod++;
    }

    if (mod < index1interval) {
      this->validp[querypos] = true;

      forward_oligo = this->forward_oligos[querypos] = forward & oligobase_mask;
      this->plus_diagterms_end5[mod] = querylength - querypos;
#ifdef LARGE_GENOMES      
      this->plus_npositions_end5[mod] = this->plus_npositions[querypos] =
	Indexdb_largeptr(&(this->plus_positions_high_end5[mod]),&(this->plus_positions_end5[mod]),
			 /*plus_indexdb*/indexdb_fwd,forward_oligo);
      this->plus_positions_high[querypos] = this->plus_positions_high_end5[mod];
#else
      this->plus_npositions_end5[mod] = this->plus_npositions[querypos] =
	Indexdb_ptr(&(this->plus_positions_end5[mod]),/*plus_indexdb*/indexdb_fwd,forward_oligo);
#endif
      this->plus_positions[querypos] = this->plus_positions_end5[mod];
      this->plus_retrievedp[querypos] = true;
      debug2(printf("(1) plus_npositions_end5[%d] = %d, oligo %016lX\n",
		    mod,this->plus_npositions_end5[mod],forward_oligo));
      
      revcomp_oligo = this->revcomp_oligos[querypos] = (revcomp >> leftreadshift) & oligobase_mask;
      this->minus_diagterms_end5[mod] = querypos + index1part;
#ifdef LARGE_GENOMES
      this->minus_npositions_end5[mod] = this->minus_npositions[querypos] =
	Indexdb_largeptr(&(this->minus_positions_high_end5[mod]),&(this->minus_positions_end5[mod]),
			 /*minus_indexdb*/indexdb_rev,revcomp_oligo);
      this->minus_positions_high[querypos] = this->minus_positions_high_end5[mod];
#else
      this->minus_npositions_end5[mod] = this->minus_npositions[querypos] =
	Indexdb_ptr(&(this->minus_positions_end5[mod]),/*minus_indexdb*/indexdb_rev,revcomp_oligo);
#endif
      this->minus_positions[querypos] = this->minus_positions_end5[mod];
      this->minus_retrievedp[querypos] = true;
      debug2(printf("(2) minus_npositions_end5[%d] = %d, oligo %016lX\n",
		    mod,this->minus_npositions_end5[mod],revcomp_oligo));
      
      debug2(printf("5' end: %s %s: %d plus positions, %d minus positions, genestrand %d\n",
		    Oligo_one_nt(forward_oligo,index1part),Oligo_one_nt(revcomp_oligo,index1part),
		    this->plus_npositions_end5[mod],this->minus_npositions_end5[mod],genestrand));
    }

    mod++;
  }

  while (mod < index1interval) {
    debug2(printf("Skipping positions_end5 %d, because last_state was DONE\n",mod));
#ifdef LARGE_GENOMES
    this->plus_positions_high_end5[mod] = this->plus_positions_high[mod] = (unsigned char *) NULL;
    this->minus_positions_high_end5[mod] = this->minus_positions_high[mod] = (unsigned char *) NULL;
#endif
    /* this->validp[mod] = false; -- default value */
    this->plus_positions_end5[mod] = this->plus_positions[mod] = (UINT4 *) NULL;
    this->minus_positions_end5[mod] = this->minus_positions[mod] = (UINT4 *) NULL;
    this->plus_npositions_end5[mod] = this->plus_npositions[mod] = 0;
    this->minus_npositions_end5[mod] = this->minus_npositions[mod] = 0;
    this->plus_retrievedp[mod] = true;
    this->minus_retrievedp[mod] = true;
    mod++;
  }

#if 0
  Reader_free(&reader);
  reader = Reader_new(queryuc_ptr,/*querystart*/0,/*queryend*/querylength,/*oligosize*/index1part);
#else
  /* Avoids freeing and allocating the same object */
  Reader_reset_ends(reader);
#endif

  /* mod here is relative to query_lastpos.  Kmer_search procedures
     need to find correspondences between mod5 and mod3 */
  query_lastpos = querylength - index1part;
  last_state = INIT;
  forward = revcomp = 0;
  mod = 0;
  while (mod < index1interval &&
	 (last_state = Oligo_next_3(last_state,&querypos,&forward,&revcomp,reader,genestrand)) != DONE) {
    while (mod < index1interval && mod < query_lastpos - querypos) {
      debug2(printf("Skipping positions_end3 %d, because querypos is not the expected one\n",mod));
#ifdef LARGE_GENOMES
      this->plus_positions_high_end3[mod] = (unsigned char *) NULL;
      this->minus_positions_high_end3[mod] = (unsigned char *) NULL;
#endif
      this->plus_positions_end3[mod] = this->plus_positions[query_lastpos - mod] = (UINT4 *) NULL;
      this->minus_positions_end3[mod] = this->minus_positions[query_lastpos - mod] = (UINT4 *) NULL;
      this->plus_npositions_end3[mod] = this->plus_npositions[query_lastpos - mod] = 0;
      this->minus_npositions_end3[mod] = this->minus_npositions[query_lastpos - mod] = 0;
      this->plus_retrievedp[query_lastpos - mod] = true;
      this->minus_retrievedp[query_lastpos - mod] = true;
      mod++;
    }

    if (mod < index1interval) {
      this->validp[querypos] = true;

      forward_oligo = this->forward_oligos[querypos] = (forward >> leftreadshift) & oligobase_mask;
      this->plus_diagterms_end3[mod] = querylength - querypos;
#ifdef LARGE_GENOMES
      this->plus_npositions_end3[mod] = this->plus_npositions[querypos] =
	Indexdb_largeptr(&(this->plus_positions_high_end3[mod]),&(this->plus_positions_end3[mod]),
			 /*plus_indexdb*/indexdb_fwd,forward_oligo);
      this->plus_positions_high[querypos] = this->plus_positions_high_end3[mod];
#else
      this->plus_npositions_end3[mod] = this->plus_npositions[querypos] =
	Indexdb_ptr(&(this->plus_positions_end3[mod]),/*plus_indexdb*/indexdb_fwd,forward_oligo);
#endif
      this->plus_positions[querypos] = this->plus_positions_end3[mod];
      this->plus_retrievedp[querypos] = true;
      debug2(printf("(3) plus_npositions_end3[%d] = %d, oligo %016lX\n",
		    mod,this->plus_npositions_end3[mod],forward_oligo));
      
      revcomp_oligo = this->revcomp_oligos[querypos] = revcomp & oligobase_mask;
      this->minus_diagterms_end3[mod] = querypos + index1part;
#ifdef LARGE_GENOMES
      this->minus_npositions_end3[mod] = this->minus_npositions[querypos] =
	Indexdb_largeptr(&(this->minus_positions_high_end3[mod]),&(this->minus_positions_end3[mod]),
			 /*minus_indexdb*/indexdb_rev,revcomp_oligo);
      this->minus_positions_high[querypos] = this->minus_positions_high_end3[mod];
#else
      this->minus_npositions_end3[mod] = this->minus_npositions[querypos] =
	Indexdb_ptr(&(this->minus_positions_end3[mod]),/*minus_indexdb*/indexdb_rev,revcomp_oligo);
#endif
      this->minus_positions[querypos] = this->minus_positions_end3[mod];
      this->minus_retrievedp[querypos] = true;
      debug2(printf("(4) minus_npositions_end3[%d] = %d, oligo %016lX\n",mod,this->minus_npositions_end3[mod],revcomp_oligo));

      debug2(printf("3' end: %s %s: %d plus positions, %d minus positions, genestrand %d\n",
		    Oligo_one_nt(forward_oligo,index1part),Oligo_one_nt(revcomp_oligo,index1part),
		    this->plus_npositions_end3[mod],this->minus_npositions_end3[mod],genestrand));
    }

    mod++;
  }

  while (mod < index1interval) {
    /* printf("Skipping positions_3 %d, because last_state was DONE\n",mod); */
#ifdef LARGE_GENOMES
    this->plus_positions_high_end3[mod] = this->plus_positions_high[query_lastpos - mod] = (unsigned char *) NULL;
    this->minus_positions_high_end3[mod] = this->minus_positions_high[query_lastpos - mod] = (unsigned char *) NULL;
#endif
    this->plus_positions_end3[mod] = this->plus_positions[query_lastpos - mod] = (UINT4 *) NULL;
    this->minus_positions_end3[mod] = this->minus_positions[query_lastpos - mod] = (UINT4 *) NULL;
    this->plus_npositions_end3[mod] = this->plus_npositions[query_lastpos - mod] = 0;
    this->minus_npositions_end3[mod] = this->minus_npositions[query_lastpos - mod] = 0;
    this->plus_retrievedp[query_lastpos - mod] = true;
    this->minus_retrievedp[query_lastpos - mod] = true;
    mod++;
  }
  Reader_free(&reader);

  return;
}


#if 0
/* Puts values of positions_end5 and positions_end3 from
   Kmer_search_exact into positions for other methods */
/* Now performed within Stage1_init */
void
Stage1_integrate_end_positions (T this, int querylength) {
  int query_lastpos = querylength - index1part;
  int mod;

  for (mod = 0; mod < index1interval; mod++) {
    /* querypos = mod; */
    this->plus_validp[mod] = true;
    this->plus_retrievedp[mod] = true;
#ifdef LARGE_GENOMES
    this->plus_positions_high[mod] = this->plus_positions_high_end5[mod];
#endif
    this->plus_positions[mod] = this->plus_positions_end5[mod];
    this->plus_npositions[mod] = this->plus_npositions_end5[mod];

    this->plus_validp[query_lastpos-mod] = true;
    this->plus_retrievedp[query_lastpos-mod] = true;
#ifdef LARGE_GENOMES
    this->plus_positions_high[query_lastpos-mod] = this->plus_positions_high_end3[mod];
#endif
    this->plus_positions[query_lastpos-mod] = this->plus_positions_end3[mod];
    this->plus_npositions[query_lastpos-mod] = this->plus_npositions_end3[mod];

    /* Using new sarray and segment-based conventions */
    this->minus_validp[query_lastpos-mod] = true;
    this->minus_retrievedp[query_lastpos-mod] = true;
#ifdef LARGE_GENOMES
    this->minus_positions_high[query_lastpos-mod] = this->minus_positions_high_end5[mod];
#endif
    this->minus_positions[query_lastpos-mod] = this->minus_positions_end5[mod];
    this->minus_npositions[query_lastpos-mod] = this->minus_npositions_end5[mod];

    this->minus_validp[mod] = true;
    this->minus_retrievedp[mod] = true;
#ifdef LARGE_GENOMES
    this->minus_positions_high[mod] = this->minus_positions_high_end3[mod];
#endif
    this->minus_positions[mod] = this->minus_positions_end3[mod];
    this->minus_npositions[mod] = this->minus_npositions_end3[mod];

#if 0
    printf("Initializing plus_positions[%d] to be %p, with count of %d\n",
	   mod,this->plus_positions[mod],this->plus_npositions[mod]);
    printf("Initializing plus_positions[%d] to be %p, with count of %d\n",
	 query_lastpos-mod,this->plus_positions[query_lastpos-mod],this->plus_npositions[query_lastpos-mod]);

    printf("Initializing minus_positions[%d] to be %p, with count of %d\n",
	   mod,this->minus_positions[mod],this->minus_npositions[mod]);
    printf("Initializing minus_positions[%d] to be %p, with count of %d\n",
	 query_lastpos-mod,this->minus_positions[query_lastpos-mod],this->minus_npositions[query_lastpos-mod]);
#endif
  }

  /* Stage1_dump(this,querylength); */

  return;
}
#endif


/* Instead of Univcoordtable, we need something big enough to store an Oligospace_T object */
void
Stage1_fill_all_oligos (T this, char *queryuc_ptr, int querylength, int genestrand) {
  Reader_T reader;
  int querypos;
  Oligostate_T last_state = INIT;
  Oligospace_T forward = 0, revcomp = 0, oligo;
  Univcoordtable_T oligo_seenp;

  oligo_seenp = Univcoordtable_new(/*hint*/querylength);

  /* query_lastpos = querylength - index1part; */
  reader = Reader_new(queryuc_ptr,/*querystart*/0,/*queryend*/querylength,/*oligosize*/index1part);

  /* Note: leftshifting is done here, rather than in Oligo_lookup */
  /* Format is 010llX because 19-mer is maximum k-mer size, which would require 10 chars */
  /* debug(printf("oligobase_mask: %010llX\n",oligobase_mask)); */
  querypos = 0;
  while ((last_state = Oligo_next_5(last_state,&querypos,&forward,&revcomp,reader,genestrand)) != DONE) {
    if (last_state != VALID) {
      /* querypos is not defined when last_state != VALID */
      debug8(printf("oligo at querypos %d is not valid\n",querypos));
    } else {
      /* querypos_rc = query_lastpos - querypos; */
      /* Previously assigned revcomp oligo to minus_oligos[querypos_rc] */
      oligo = this->forward_oligos[querypos] = forward & oligobase_mask;
      this->revcomp_oligos[querypos] = (revcomp >> leftreadshift) & oligobase_mask;
      debug8(printf("Putting forward oligo %016lX and revcomp oligo %016lX at querypos %d\n",
		    this->forward_oligos[querypos],this->revcomp_oligos[querypos],querypos));

      if (Univcoordtable_get(oligo_seenp,oligo) != NULL) {
	/* Handling repetitive sequences */
	debug8(printf("oligo at plus %d already seen, so marking as invalid\n",querypos));
	this->validp[querypos] = false;
      } else {
	this->validp[querypos] = true;
	Univcoordtable_put(oligo_seenp,oligo,(void *) true);
      }
    }
  }

  Reader_free(&reader);

  Univcoordtable_free(&oligo_seenp);

  return;
}


void
Stage1_fill_all_positions (T this, int querylength, int genestrand) {
  int query_lastpos, querypos;
  Indexdb_T plus_indexdb, minus_indexdb;

  if (genestrand == +2) {
    plus_indexdb = indexdb_rev;
    minus_indexdb = indexdb_fwd;
  } else {
    plus_indexdb = indexdb_fwd;
    minus_indexdb = indexdb_rev;
  }

  query_lastpos = querylength - index1part;

  /* Assumes that forward_oligos and revcomp_oligos have been filled
     in (by Stage1_fill_all_oligos */
  /* Format is 010llX because 19-mer is maximum k-mer size, which would require 10 chars */
  /* debug(printf("oligobase_mask: %010llX\n",oligobase_mask)); */
  for (querypos = 0; querypos < query_lastpos; querypos++) {
    if (this->validp[querypos] == false) {
      /* Forward and revcomp oligos not valid */
    } else {
      if (this->plus_retrievedp[querypos] == true) {
	/* No need to do anything */
      } else {
#ifdef LARGE_GENOMES
	this->plus_npositions[querypos] = 
	  Indexdb_largeptr(&this->plus_positions_high[querypos],&this->plus_positions[querypos],
			   plus_indexdb,this->forward_oligos[querypos]);
#else
	this->plus_npositions[querypos] = 
	  Indexdb_ptr(&this->plus_positions[querypos],plus_indexdb,this->forward_oligos[querypos]);
#endif
	this->plus_retrievedp[querypos] = true;
      }

      if (this->minus_retrievedp[querypos] == true) {
	/* No need to do anything */
      } else {
#ifdef LARGE_GENOMES
	this->minus_npositions[querypos] =
	  Indexdb_largeptr(&this->minus_positions_high[querypos],&this->minus_positions[querypos],
			   minus_indexdb,this->revcomp_oligos[querypos]);
#else
	this->minus_npositions[querypos] =
	  Indexdb_ptr(&this->minus_positions[querypos],minus_indexdb,this->revcomp_oligos[querypos]);
#endif
	this->minus_retrievedp[querypos] = true;
      }
    }
  }

  return;
}


#ifdef DEBUG1
static void
list_paths (List_T list, char *destination, bool expected_sensedir) {
  List_T p;
  Path_T path;

  for (p = list; p != NULL; p = List_next(p)) {
    path = (Path_T) List_head(p);
    printf("Destination %s: ",destination);
    Path_print(path);
    assert(path->sensedir == expected_sensedir);
  }

  return;
}


void
Stage1_list_paths (T this) {

  printf("Dump of paths\n");
  list_paths(this->unsolved_sense_paths_gplus,"unsolved_sense_paths_gplus",
	     /*expected_sensedir*/SENSE_FORWARD);
  list_paths(this->unsolved_sense_paths_gminus,"unsolved_sense_paths_gminus",
	     /*expected_sensedir*/SENSE_FORWARD);
  list_paths(this->unsolved_antisense_paths_gplus,"unsolved_antisense_paths_gplus",
	     /*expected_sensedir*/SENSE_ANTI);
  list_paths(this->unsolved_antisense_paths_gminus,"unsolved_antisense_paths_gminus",
	     /*expected_sensedir*/SENSE_ANTI);

  list_paths(this->unextended_sense_paths_gplus,"unextended_sense_paths_gplus",
	     /*expected_sensedir*/SENSE_FORWARD);
  list_paths(this->unextended_sense_paths_gminus,"unextended_sense_paths_gminus",
	     /*expected_sensedir*/SENSE_FORWARD);
  list_paths(this->unextended_antisense_paths_gplus,"unextended_antisense_paths_gplus",
	     /*expected_sensedir*/SENSE_ANTI);
  list_paths(this->unextended_antisense_paths_gminus,"unextended_antisense_paths_gminus",
	     /*expected_sensedir*/SENSE_ANTI);

  list_paths(this->extended_sense_paths_gplus,"extended_sense_paths_gplus",
	     /*expected_sensedir*/SENSE_FORWARD);
  list_paths(this->extended_sense_paths_gminus,"extended_sense_paths_gminus",
	     /*expected_sensedir*/SENSE_FORWARD);
  list_paths(this->extended_antisense_paths_gplus,"extended_antisense_paths_gplus",
	     /*expected_sensedir*/SENSE_ANTI);
  list_paths(this->extended_antisense_paths_gminus,"extended_antisense_paths_gminus",
	     /*expected_sensedir*/SENSE_ANTI);

  list_paths(this->sense_paths_gplus,"sense_paths_gplus",
	     /*expected_sensedir*/SENSE_FORWARD);
  list_paths(this->sense_paths_gminus,"sense_paths_gminus",
	     /*expected_sensedir*/SENSE_FORWARD);
  list_paths(this->antisense_paths_gplus,"antisense_paths_gplus",
	     /*expected_sensedir*/SENSE_ANTI);
  list_paths(this->antisense_paths_gminus,"antisense_paths_gminus",
	     /*expected_sensedir*/SENSE_ANTI);

  return;
}
#endif


void
Stage1_free (T *old, Trdiagpool_T trdiagpool, Univdiagpool_T univdiagpool,
	     Intlistpool_T intlistpool, Univcoordlistpool_T univcoordlistpool,
	     Listpool_T listpool, Pathpool_T pathpool, Transcriptpool_T transcriptpool,
	     Hitlistpool_T hitlistpool, bool free_paths_p) {

  /* Stage1hr_check(*old); */
  debug1(Stage1_list_paths(*old));

  if (*old) {
    Path_gc(&(*old)->unsolved_sense_paths_gplus,
	    intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
    Path_gc(&(*old)->unsolved_sense_paths_gminus,
	    intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
    Path_gc(&(*old)->unsolved_antisense_paths_gplus,
	    intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
    Path_gc(&(*old)->unsolved_antisense_paths_gminus,
	    intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);

    if (free_paths_p == true) {
      Path_gc(&(*old)->unextended_sense_paths_gplus,
	      intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
      Path_gc(&(*old)->unextended_sense_paths_gminus,
	      intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
      Path_gc(&(*old)->unextended_antisense_paths_gplus,
	      intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
      Path_gc(&(*old)->unextended_antisense_paths_gminus,
	      intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
    
      Path_gc(&(*old)->extended_sense_paths_gplus,
	      intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
      Path_gc(&(*old)->extended_sense_paths_gminus,
	      intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
      Path_gc(&(*old)->extended_antisense_paths_gplus,
	      intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
      Path_gc(&(*old)->extended_antisense_paths_gminus,
	      intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);

      Path_gc(&(*old)->sense_paths_gplus,
	      intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
      Path_gc(&(*old)->sense_paths_gminus,
	      intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
      Path_gc(&(*old)->antisense_paths_gplus,
	      intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
      Path_gc(&(*old)->antisense_paths_gminus,
	      intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
    }

#if 0
    /* Now pointing to data structure, and not copying values */
    for (i = 0; i <= query_lastpos; i++) {
      if ((*old)->plus_retrievedp[i] == true) {
#ifdef LARGE_GENOMES
	FREE((*old)->plus_positions_high[i]);
#endif
	FREE((*old)->plus_positions[i]);
      }

      if ((*old)->minus_retrievedp[i] == true) {
#ifdef LARGE_GENOMES
	FREE((*old)->minus_positions_high[i]);
#endif
	FREE((*old)->minus_positions[i]);
      }
    }
#endif

    /* FREE((*old)->mismatch_positions_alloc); */
    FREE((*old)->positions_alloc);
    Mergeinfo_uint4_free(&(*old)->mergeinfo_tr);
#ifdef LARGE_GENOMES
    Mergeinfo_uint8_free(&(*old)->mergeinfo);
#else
    Mergeinfo_uint4_free(&(*old)->mergeinfo);
#endif
    Spliceinfo_free(&(*old)->spliceinfo);
    Indelinfo_free(&(*old)->indelinfo);

#ifdef LARGE_GENOMES
    FREE((*old)->stream_high_alloc);
    FREE((*old)->stream_low_alloc);
#endif
    FREE_ALIGN((*old)->streamspace_alloc);
    FREE((*old)->streamptr_alloc);
    FREE((*old)->streamsize_alloc);
    FREE((*old)->querypos_diagterm_alloc);


#ifdef LARGE_GENOMES
    FREE((*old)->positions_high_allocated);
#endif
    FREE((*old)->positions_allocated);
    FREE((*old)->npositions_allocated);

    FREE((*old)->retrievedp_allocated);

    FREE((*old)->revcomp_oligos);
    FREE((*old)->forward_oligos);

    FREE((*old)->validp);

    Elt_gc(&(*old)->queryfwd_plus_set,listpool,univdiagpool);
    Elt_gc(&(*old)->queryfwd_minus_set,listpool,univdiagpool);
    Elt_gc(&(*old)->queryrev_plus_set,listpool,univdiagpool);
    Elt_gc(&(*old)->queryrev_minus_set,listpool,univdiagpool);

    Tr_elt_gc(&(*old)->tr_queryfwd_plus_set,listpool,trdiagpool);
    Tr_elt_gc(&(*old)->tr_queryfwd_minus_set,listpool,trdiagpool);
    Tr_elt_gc(&(*old)->tr_queryrev_plus_set,listpool,trdiagpool);
    Tr_elt_gc(&(*old)->tr_queryrev_minus_set,listpool,trdiagpool);

    FREE(*old);
  }

  return;
}


void
Stage1hr_setup (Indexdb_T indexdb_fwd_in, Indexdb_T indexdb_rev_in, Indexdb_T indexdb_tr_in,
		int index1part_tr_in, int index1part_in, int index1interval_in, 
		Chrpos_T shortsplicedist_novelend_in, Transcriptome_T transcriptome_in) {

  indexdb_fwd = indexdb_fwd_in;
  indexdb_rev = indexdb_rev_in;
  indexdb_tr = indexdb_tr_in;

  index1part_tr = index1part_tr_in;
  index1part = index1part_in;
  index1interval = index1interval_in;

#ifdef HAVE_64_BIT
  leftreadshift = 64 - index1part - index1part;
  oligobase_mask = ~(~ (Oligospace_T) 0 << 2*index1part);
#else
  leftreadshift = 32 - index1part - index1part;
  oligobase_mask = ~(~ (Oligospace_T) 0 << 2*index1part);
#endif

  shortsplicedist_novelend = shortsplicedist_novelend_in;
  transcriptome = transcriptome_in;

  return;
}
