Logo Search packages:      
Sourcecode: epcr version File versions  Download package

fasta-io.h

///////////////////////////////////////////////////////////////////
// $Id: fasta-io.h,v 1.1 2003/03/31 21:08:27 rotmistr Exp $
///////////////////////////////////////////////////////////////////
#ifndef __fasta_io_h__
#define __fasta_io_h__

#include "util.h"

#define SEQLINE_LEN_DEFAULT 60   // default sequence characters per line
#define SEQLINE_LEN_MAX    120   // max sequence characters per line
#define SEQLINE_LEN_MIN     10   // min sequence characters per line

extern char chrValidAa[];
extern char chrValidNt[];

#define SEQTYPE_AA 1
#define SEQTYPE_NT 2


char * ExtractSeqAcc (const char *seqid, char *acc);
char * ExtractSeqName (const char *seqid, char *name);



//#define GetDefline Defline
//#define GetSequence Sequence

// Linked List class 
class LList;



class FastaSeq
{
public:
      FastaSeq ();
      FastaSeq (char *def, char *seq);
      ~FastaSeq ();

      void Clear();

      int  ParseText(const char *text, int upper, const char *alphabet);
      int  ParseText(const char *text, int upper, int seqtype);

      int SeqLength () const { return m_len; }
      const char * SeqIdent () const { return m_tag; }

      const char * Title() const;
      const char * Defline() const;
      const char * Sequence() const;


      // some obsolete functions preserved for backward compatibility:
      int Length() const;
      const char * Accession () const;
      const char * Label() const;


protected:
      char *m_tag;    // complete seqid string, e.g. "gi|12345|gb|AC001234.1|HS012345"
      char *m_acc;    // accession number extracted from the seqid
      char *m_def;    // definition line (no seqid)
      char *m_seq;    // sequence data as a string
      int   m_len;    // sequence length
      void *m_bogus;
      LList *m_segments;

      void SetDefline (char *string);
      void SetSequence (char *string);

      friend class FastaFile;
};


class FastaFile 
{
public:
      FastaFile();
      FastaFile(int seqtype);
      ~FastaFile();

      bool Open (const char *fname, const char *fmode);
      bool Close ();

      bool IsOpen () const
            { return (m_file ==NULL) ? 0 : 1; }

      int Attach (FILE *file);
      FILE* Detach ();

      bool Read (FastaSeq &seq);
      bool Read (FastaSeq *&seq);
      bool Write (FastaSeq &seq);

      int  SetFilePos (fpos_t &fpos);
      int  GetFilePos (fpos_t &fpos);

//    long Tell ();
//    int  Seek (long offset, int whence);

      void ToUpperMode (int flag) { m_toupper = flag; }

      int BuildSeqIndex ();
      int GetSeqOffset (const char *acc, fpos_t &offset);

protected:
      char *m_file_name;
      FILE *m_file;
      int   m_line_num;
      int   m_seqtype;
      int   m_toupper;
      void *m_index;
      int   m_count;
};


extern int WriteSeqLines (FILE *fd, const char *seq, int len, int linelen=SEQLINE_LEN_DEFAULT);


extern void init_revcomp();
extern void revcomp(char *s, int length);

int seq_hash (long &hash, const char *seq, int len);

#endif
/*
 * $Log: fasta-io.h,v $
 * Revision 1.1  2003/03/31 21:08:27  rotmistr
 * Imported to CVS
 * Compilable with gcc
 *
 */

Generated by  Doxygen 1.6.0   Back to index