Thursday, October 4, 2018

ATCG Content of Multiple Gene Sequences using C++

ATCG Content Output

This is a simple tutorial for computing ATCG contents of multiple gene sequences using the classes, objects, and members in C++. In this tutorial, I have used Dev C++ v5.11 software for compiling the C++ program.

Program Design

A. Define a class to represent a gene sequence data. Include the following members:

  1. Data members:
    • Gene name
    • Gene ID
    • Length
    • A, T, G, C content
  2. Member functions:
    • To read data for a gene
    • To compute A, T, G, C content
    • To display all the details of a gene
B. Write a main program to test the program by reading n gene sequences data.

Source Code

/* Computing ATCG Content of Multiple Gene Sequences */
#include <iostream>
#include <iomanip>
#include <conio.h>
#include <string.h>
#include <stdlib.h>

class gene {
  char r, gene_name[20][20], gene_seq[20][200], gene_id[20][20], id[20];
  int gene_length, a, t, c, g, o, n, i, j, m, substr_eq, substr_rem;
  std::string seq, s;
  
  public:
    void get_sequence();
    void find_length();
    void find_atcg();
};

void gene::get_sequence() {
  system("cls");
  std::cout << "\n+---------------------------------------------------+";
  std::cout << "\n| Computing ATCG Content of Multiple Gene Sequences |";
  std::cout << "\n+---------------------------------------------------+\n";
  std::cout << "\nHow many sequence? ";
  std::cin >> n;
  for (i = 0; i < n; i++) {
    std::cout << "\nEnter the Details of Gene " << i+1 << ":\n\n";
    std::cout << "Enter the Gene Name: ";
    std::cin >> gene_name[i];
    std::cout << "Enter the Gene ID: ";
    std::cin >> gene_id[i];
    std::cout << "Enter the Gene Sequence: ";
    std::cin >> gene_seq[i];
    strupr(gene_seq[i]);
  }
}
  
void gene::find_length() {
  again:
  system("cls");
  m = 0;
  std::cout << "\n+---------------------------------------------------+";
  std::cout << "\n| Computing ATCG Content of Multiple Gene Sequences |";
  std::cout << "\n+---------------------------------------------------+\n";
  std::cout << "\nEnter the Gene ID to Compute: ";
  std::cin >> id;
  for (i = 0; i < n; i++) {
    if (strcmpi(id, gene_id[i]) == 0) {
      gene_length = strlen(gene_seq[i]);
      std::cout << "\n\nSequence Name: " << gene_name[i];
      s = gene_seq[i];
      substr_eq = gene_length / 50;
      substr_rem = gene_length % 50;
      if (substr_rem == 0) substr_eq--;
      std::cout << "\n\n\nGene Sequence: \n\n\t";
      for (i = 0; i < substr_eq + 1; i++) {
        for (j = i * 50; j < gene_length && j < (i+1) * 50; j += 1) {
          if ((j + 1) % 10 == 0) {
            std::cout << s[j] << " ";
          } else {
            std::cout << s[j];
          }
        }
        std::cout << "\n\t";
      }
      seq = s;
      std::cout << "\n\nLength of the Sequence: " << gene_length;
      m++;
    }
  }
  if (m == 0) {
    std::cout << "\n\nThe Gene ID Does Not Exists!";
    std::cout << "\n\nDo you want to try again (y/n)? ";
    std::cin >> r;
    if (r == 'y' || r == 'Y') {
      goto again;
    } else {
      getch();
      exit(0);
    }
  }
}
  
void gene::find_atcg() {
  a = 0, t = 0, c = 0, g = 0, o = 0;
  for (i = 0; i < gene_length; i++) {
    if (seq[i] == 'A') {
      a++;
    } else if (seq[i] == 'T') {
      t++;
    } else if (seq[i] == 'C') {
      c++;
    } else if (seq[i] == 'G') {
      g++;
    } else {
      o++;
    }
  }
  std::cout << "\n\nATCG Content of the Sequence:";
  std::cout << "\n\n\tA = " << std::left << std::setw(6) << a << "T = " << t;
  std::cout << "\n\n\tC = " << std::left << std::setw(6) << c << "G = " << g;
  std::cout << "\n\n\tX = " << o;
}

int main() {
  gene g;
  char rp;
  g.get_sequence();
  agn:
  g.find_length();
  g.find_atcg();
  std::cout << "\n\nDo you want to try another (y/n)? ";
  std::cin >> rp;
  if (rp == 'y' || rp == 'Y') {
    goto agn;
  } else {
    getch();
    exit(0);
  }
}

Input/Output

ATCG Content Input

0 comments :

Post a Comment