mf2sqn

#!/usr/bin/perl -w -- -*-Perl-*-

##############################################################################
#
#                                  mf2sqn
#
# DESCRIPTION:
# mf2sqn - Converts an OGMP masterfile into a Sequin file in order to
# make submission
#
##############################################################################

#############################################################################
#                                 mf2sqn                                    #
#                                                                           #
#                          Copyright (C) 2008                               #
#                         Departement de Biochimie,                         #
#                          Universite de Montreal,                          #
#                     C.P. 6128, succursale Centre-ville,                   #
#                      Montreal, Quebec, Canada, H3C 2J7                    #
#                                                                           #
#                Programming:  Natacha Beck.                                #
#                Project management: Franz Lang (OGMP)                      #
#                E-Mail information: Franz.Lang@Umontreal.ca                #
#                                                                           #
#     This software is distributed under the GNU GENERAL PUBLIC LICENSE, as #
# published by the Free Software Foundation. A copy of version 2 of this    #
# license should be included in a file called COPYING. If not, write to the #
# Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.   #
#############################################################################

##########################
# Initialization section #
##########################

require 5.00;
require "qualifs.pl";
use strict;
use vars qw( $VERSION $RCS_VERSION );
use PirObject;                                        # Pir module treatment
use IO::File;
use File::Path;
use File::Copy;
use File::Basename;
use Cwd;

BEGIN {
# Load PirObject
  PirObject->LoadDataModel("Masterfile");               # Masterfile object
} # End Begin

# Default umask
umask 027;

# Program's name and version number.
$RCS_VERSION='$Id: mf2sqn,v 1.21 2013/10/15 01:05:30 nbeck Exp $';
($VERSION) = ($RCS_VERSION =~ m#,v ([\w\.]+)#);
my ($BASENAME) = ($0 =~ /([^\/]+)$/);

# Get login name
my $USER = getpwuid($<) or getlogin or die "Can't find USER from environment!\n";

##################################
# Global variables and constants #
##################################

$|=1;
my $DEBUG=0;
my $TMPDIR="/tmp/$BASENAME.$$";
my $GENE_NAME={};
my $OGMP_QUALS_CLASSES= {};
my $OGMP_QUALS_TEXTS={};
my $OGMP_QUALS_USAGE={};
my $OGMP_QUALS_GENENAME={};
my $OGMP_QUALS_TO_NOTE={};


##############################
# Verification for required: #
#   File and software        #
##############################

# Files
my $HOME          = $ENV{"HOME"};
my @LIB_PATH      = (($HOME || "."));
push(@LIB_PATH,split(/:/,$ENV{"MF2SQN_LIB"})) if $ENV{"MF2SQN_LIB"};

my $GENE_NAME_FILE    = "";
my $QUALIF_FILE       = "";
my $SUB_INFO_MODEL    = "";
my $SUBMITTERDIR      = "";
foreach my $dir (@LIB_PATH) {
     next if !( -e "$dir/gene_names.lst" || -e "$dir/ogmp_qualifiers.lst" ||
                -e "$dir/mf2sqn_SubInfo" || -e "$dir/mf2sqn_Submitters");
     $GENE_NAME_FILE = "$dir/gene_names.lst"      if !$GENE_NAME_FILE && (-e "$dir/gene_names.lst");
     $QUALIF_FILE    = "$dir/ogmp_qualifiers.lst" if !$QUALIF_FILE    && (-e "$dir/ogmp_qualifiers.lst");
     $SUB_INFO_MODEL = "$dir/mf2sqn_SubInfo"      if !$SUB_INFO_MODEL && (-e "$dir/mf2sqn_SubInfo");
     $SUBMITTERDIR   = "$dir/mf2sqn_Submitters"   if !$SUBMITTERDIR   && (-e "$dir/mf2sqn_Submitters");
     last if $GENE_NAME_FILE && $QUALIF_FILE && $SUB_INFO_MODEL && $SUBMITTERDIR;
}

die "Doesn't find the 'gene name list' file 'gene_names.lst'. Check your installation for this one.\n"
    if !$GENE_NAME_FILE;

die "Doesn't find the 'ogmp qualifiers' file 'ogmp_qualifiers.lst'. Check your installation for this one.\n"
     if !$QUALIF_FILE;

die "Doesn't find the 'mf2sqn subinfo' file 'mf2sqn_SubInfo'. Check your installation for this one.\n"
     if !$SUB_INFO_MODEL;

die "Doesn't find the 'mf2sqn submitters' directory 'mf2sqn_Submitters'. Check your installation for this one.\n"
    if !$SUBMITTERDIR;

# Software
my $PATH = $ENV{"PATH"} || "";
my @PATH = split(/:/, $PATH);
my $TBL2ASN=&GetPath("tbl2asn");


# Basic command-line args
my $MASTERFILE="";          # The name of a file containing a genome in MF format
my $MF_NAME="";             # The name of masterfile
my $SUBINFOFILE="";         # The name of a file containing the submission information
my $AUTHOR="";              # Author name (Lang or Burger)
my $AUTHORFILE="";          # The name of a file containing autor information
my $CREATEGB=0;             # Use in order to create Genbank file
my $CREATETB=0;             # Use in order to create Table file


my $SUBINFO={};
my $ACCEPT_GENOME  = {   O  => 'apicoplast',
                         C  => 'chloroplast',
                         H  => 'chromoplast',
                         Y  => 'cyanelle',
                         E  => 'extrachromosomal',
                         G  => 'genomic',
                         K  => 'kinetoplast',
                         L  => 'leucoplast',
                         A  => 'macronuclear',
                         M  => 'mitochondrion',
                         N  => 'nucleomorph',
                         D  => 'plasmid',
                         P  => 'plastid',
                         R  => 'proplastid',
                         W  => 'proviral',
                         U  => 'unknown' };
my $ACCEPT_GCODE    = { '1' =>  'Standard',
                        '2' => 'Vertebrate Mitochondrial',
                        '3' => 'Yeast Mitochondrial',
                        '4' => 'Mold Mitochondrial',
                        '5' => 'Invertebrate Mitochondrial',
                        '6' => 'Ciliate Dasycladacean Hexamita Nuclear',
                        '9' => 'Echinoderm Flatworm Mitochondrial',
                       '10' => 'Euplotid Nuclear',
                       '11' => 'Bacterial and Plant Plastid',
                       '12' => 'Alternative Yeast Nuclear',
                       '13' => 'Ascidian Mitochondrial',
                       '14' => 'Alternative Flatworm Mitochondrial',
                       '15' => 'Blepharisma Macronuclear',
                       '16' => 'Chlorophycean Mitochondrial',
                       '21' => 'Trematode Mitochondrial',
                       '22' => 'Scenedesmus Obliquus Mitochondrial',
                       '23' => 'Thraustochytrium Mitochondrial'};
my $ACCEPT_MOLCLASS = { D  => 'DNA',
                        R  => 'RNA',
                        N  => 'NUCLEOTIDE',
                        P  => 'PROTEIN',
                        O  => 'OTHER'};
my $ACCEPT_MOLTYPE  = { G  => 'genomic',
                        E  => 'pre mrna',
                        M  => 'mrna',
                        R  => 'rrna',
                        T  => 'trna',
                        S  => 'snrna',
                        C  => 'scrna',
                        P  => 'peptide',
                        O  => 'OTHER GENETIC',
                        X  => 'GENOMIC MRNA'};
my $ACCEPT_TOPO     = { C  => 'CIRCULAR',
                        L  => 'LINEAR',
                        C  => 'CIRCULAR',
                        T  => 'TANDEM',
                        O  => 'OTHER'};
my $ACCEPT_COMPLETE = { C  => 'Complete',
                        P  => 'Partial',
                        NL => 'No Left',
                        NR => 'No Right',
                        NE => 'No Ends',
                        HL => 'Has Left',
                        HR => 'Has Right',
                        O  => 'Other'};
$SUBINFO = { 'GENOME'         => $ACCEPT_GENOME->{'M'},
             'NUC_CODE'       => '1',
             'MIT_CODE'       => '1',
             'MOLCLASS'       => $ACCEPT_MOLCLASS->{'D'},
             'MOLTYPE'        => $ACCEPT_MOLTYPE->{'G'},
             'TOPOLOGY'       => $ACCEPT_TOPO->{'C'},
             'COMPLETEDNESS'  => $ACCEPT_COMPLETE->{'C'}};
my $MODIFIERS       = { 'TITLE'         => '1',
                        'GENOME'        => 'location',
                        'MIT_CODE'      => 'mgcode',
                        'NUC_CODE'      => 'gcode',
                        'SCIENTIFIC'    => 'organism',
                        'STRAIN'        => '1',
                        'COMMENT'       => '1',
                        'MOLCLASS'      => 'mol',
                        'MOLTYPE'       => 'type',
                        'TOPOLOGY'      => 'topology',
                        'COMPLETEDNESS' => 'completedness',
                        'CITATION'      => '1'};
my $INFO = {};

# Advanced command-line args

#####################################
# Command-Line Arguments Processing #
#####################################

sub Usage {
    my $message = shift || "";
    print STDERR <<USAGE;
Basic usage: $BASENAME [-d] [-t] -a author -m masterfile

where: -a author     :  All submitter file need to be under `MF2SQN_LIB/mf2sqn_Submitters` (with MF2SQN_LIB your environment variable)
                        and have following format name "SubmitterName.sbt" (need to respect case).
                        A template can be found in "SubmitterGeneric.sbt".
                        Each file contain a text ASN.1 Submit-block object.
                        The Submit-block contains contact information (to whom questions
                        on the submission can be addressed) and a submission citation (which lists
                        the authors who get scientific credit for the sequencing).

       -m masterfile : Is the masterfile for submission.

       -d            : Create GenBank file in output.
       -t            : Create Table file in output.
       -g            : gene_names_file.

USAGE
    print STDERR "\n$message\n" if $message;
    exit 20;
}

&Usage if scalar(@ARGV) == 0;

for (;@ARGV;) {
    my ($opt,$arg) = ($ARGV[0] =~ /^-([\@madtg])(.*)$/o);
    last if ! defined $opt;
    if ($opt =~ /[\@mag]/ && $arg eq "") {
        if (@ARGV < 2) {
            print STDERR "Argument required for switch \"-$opt\".\n";
            exit 1;
        }
        shift @ARGV;
        $arg=$ARGV[0];
    }

    $DEBUG=(defined($arg) ? $arg : 1)             if $opt eq '@';
    $MASTERFILE=$arg                              if $opt eq 'm';
    $AUTHOR=$arg                                  if $opt eq 'a';
    $CREATEGB=1                                   if $opt eq 'd';
    $CREATETB=1                                   if $opt eq 't';
    $GENE_NAME_FILE=$arg                          if $opt eq 'g';

    shift @ARGV;
}

###########################################
# Validate remaining command-line options #
###########################################

&Usage( "Error: the masterfile '$MASTERFILE' supplied with -m doesn't exist\n" )
    unless -f "$MASTERFILE";
$MF_NAME = basename($MASTERFILE);
my $MF_NAME_S = basename($MASTERFILE);
$MF_NAME_S =~ s/\.fsa/\.f_s_a_$$/g;

&Usage( "Accepted value for author is Lang or Burger")
    if !(-e "$SUBMITTERDIR/Submitter$AUTHOR.sbt");
$AUTHORFILE= "$SUBMITTERDIR/Submitter$AUTHOR.sbt";

die "The author file '$AUTHORFILE' doesn't exist\n"
    unless -f "$AUTHORFILE";

################
# Trap Signals #
################

sub SigCleanup { # private
     die "\nExiting: received signal \"" . $_[0] . "\".\n";
     # Note that some cleanup will be performed in the END block at this point.
}
$SIG{'INT'}  = \&SigCleanup;
$SIG{'TERM'} = \&SigCleanup;
$SIG{'HUP'}  = \&SigCleanup;
$SIG{'QUIT'} = \&SigCleanup;
$SIG{'PIPE'} = \&SigCleanup;

###############################
#   M A I N   P R O G R A M   #
###############################

# CREATE THE TMPDIR #
mkdir($TMPDIR,0700) or die "Error: can't create work directory '$TMPDIR': $!\n";

# LOAD THE MF #
my $PIRMASTER    = PirObject::Masterfile->ObjectFromMasterfile($MASTERFILE,1);
my $CONTIGS      = $PIRMASTER->get_contigs();
my $NB_CONTIGS   = scalar(@$CONTIGS);

my $STEP = 1;
my $to_print = ($NB_CONTIGS > 1 ? "Masterfile '$MASTERFILE' present $NB_CONTIGS contigs\n"
                       : "Masterfile '$MASTERFILE' present $NB_CONTIGS contig\n");
print $to_print;
print "$STEP) Look for SubInfo file\n";
&LookForSubInfo();
$STEP++;
print "$STEP) Parse information of SubInfo file\n";
&GetInfo();
$STEP++;
print "$STEP) Create fasta file for tbl2asn\n";
&MakeFastaFile();
$STEP++;
print "$STEP) Create table file for tbl2asn\n";
&ParseGeneList();
&ParseQualifList();
&CreateTableFile();
$STEP++;
print "$STEP) Run tbl2asn\n";
&RunTBL2ASN();

exit 0;

END {
    # With exit, programme will go here
    # Cleanup temp directory when program exits.
    return unless defined($TMPDIR) and $TMPDIR =~ m#^/tmp/#;
    print "Temporary work directory $TMPDIR NOT cleaned up ...\n" if $DEBUG;
    rmtree($TMPDIR) unless $DEBUG;
}

#############################
#   S U B R O U T I N E S   #
#############################

sub LookForSubInfo {
    my $current_wd = getcwd();
    my $subInfoName = "$MF_NAME.subInfo";
    if (! (-r ($subInfoName))){
       copy($SUB_INFO_MODEL, $subInfoName)  or die "File '$SUB_INFO_MODEL' cannot be copied in '$subInfoName'.\n";
       print "\nYou don't have local copy of subInfo file for '$MF_NAME'.\n"
            ."A local copy was made in '$subInfoName'.\n"
            ."Perhaps you need to make some modification.\n"
            ."After modification just re-run mf2sqn.\n\n";
       exit 1;
    }
    $SUBINFOFILE = $subInfoName;
}

# Get input info

sub GetInfo {
   my $INFO_F = new IO::File "<$SUBINFOFILE"
        or die "Can not open the subinfo file '$SUBINFOFILE': $!\n";

    my $Before_tag = 1;
    my $concat = "";
    my $info   = {};
    while (my $line = <$INFO_F>) {
        next if $line =~ m/^#/;
        next if $line =~ m/^\%SUBINFO\s*=\s*\(/;
        chomp($line);
        $concat .= $line unless $line =~ m/\);/;
    }

    $concat =~ s/\}/\}\n/g;

    my @line_for_hash = split("\n",$concat);
    foreach my $line_hash (@line_for_hash){
        my $key = $1 if $line_hash =~ m/^\s*\'(.+)\'\s*=>\s*{/;
        my $st_value = $1 if $line_hash =~ m/\{(.+)\}/;
           $st_value =~ s/\'\s+\'/\'\n\'/g;
        my @line_for_sub_hash = split(/\n/,$st_value);

        my $sub_hash = {};
        foreach my $line_sub (@line_for_sub_hash){
            die "Can't parse line '$line_sub' in '$SUBINFOFILE'\n"
                if !($line_sub =~ m/\s*\'.+\'\s*=>\s*\'.*\'/);
            my ($sub_key,$sub_value) = ($1,$2) if $line_sub =~ m/\s*\'(.+)\'\s*=>\s*\'(.*)\'/;
            $sub_hash->{$sub_key} =  $sub_value;
        }
        $INFO->{$key} = $sub_hash;
    }

    &CheckInfoEntry();
    $INFO_F->close();
}

sub CheckInfoEntry {

    while (my  ($tag, $hash) = each %$INFO ) {
        while (my  ($field, $value) = each %$hash ) {
            &CheckFieldAndComp($field,"genome"       ,$ACCEPT_GENOME,$value);
            &CheckFieldAndComp($field,"nuc_code"     ,$ACCEPT_GCODE,$value);
            &CheckFieldAndComp($field,"mit_code"     ,$ACCEPT_GCODE,$value);
            &CheckFieldAndComp($field,"molclass"     ,$ACCEPT_MOLCLASS,$value);
            &CheckFieldAndComp($field,"moltype"      ,$ACCEPT_MOLTYPE,$value);
            &CheckFieldAndComp($field,"topology"     ,$ACCEPT_TOPO,$value);
            &CheckFieldAndComp($field,"completedness",$ACCEPT_COMPLETE,$value);
            $SUBINFO->{'SCIENTIFIC'} = $value if lc($field) eq "scientific";
            $SUBINFO->{'STRAIN'}     = $value if lc($field) eq "strain";
            $SUBINFO->{'TITLE'}      = $value if lc($field) eq "title";
            $SUBINFO->{'COMMENT'}    = $value if lc($field) eq "comment";
	    if (lc($field) eq "citation") {
	      my @ids = split(",",$value);
	      foreach my $id (@ids) {
	        die "Accepted value for field 'CITATION' in $SUBINFOFILE is only number\n"
		  if $id =~ m/\D+/;
	      }
	    }
            $SUBINFO->{'CITATION'}   = $value if lc($field) eq "citation";
        }
    }
}

sub CheckFieldAndComp {
    my ($field,$field_to_check,$accept_values,$value) = @_;

    my $uc_field = uc($field);
    if (lc($field) eq $field_to_check){
        if (!($accept_values->{$value})){
            my $list = "";
                while (my ($ab,$complet) = each %$accept_values){
                    $list .= "$ab,";
                }
            $list =~ s/,$/\./;

            die "Accepted value for field '$uc_field' in $SUBINFOFILE is : $list\n";
        }
        $SUBINFO->{'GENOME'}        = $ACCEPT_GENOME->{$value}   if lc($field) eq "genome";
        $SUBINFO->{'NUC_CODE'}      = $value                     if lc($field) eq "nuc_code";
        $SUBINFO->{'MIT_CODE'}      = $value                     if lc($field) eq "mit_code";
        $SUBINFO->{'MOLCLASS'}      = $ACCEPT_MOLCLASS->{$value} if lc($field) eq "molclass";
        $SUBINFO->{'MOLTYPE'}       = $ACCEPT_MOLTYPE->{$value}  if lc($field) eq "moltype";
        $SUBINFO->{'TOPOLOGY'}      = $ACCEPT_TOPO->{$value}     if lc($field) eq "topology";
        $SUBINFO->{'COMPLETEDNESS'} = $ACCEPT_COMPLETE->{$value} if lc($field) eq "completedness";
    }
}


# CreateFastaFile #

sub MakeFastaFile {
    my $count      = 0;
    my $format_seq = "";
    my $file_name  = "$MF_NAME_S.fsa";
    foreach my $contig (@$CONTIGS) {
        my $seq  = lc($contig->get_sequence());
           $seq  =~ s/!//g;
        my $name = $contig->get_name();
        die "Sequence of contig '$name' contain bad caracters"
            if $seq =~ m/[^acgturykmswbdhvnx]/; #According with UPAC
        $format_seq .= &CreateHeader($count);
        for (my $i = 0 ; $i <= length($seq); $i += 80 ) {
            my $rest_length = length($seq) - $i;
            if ( $rest_length <= 80 ) {
                $format_seq .= substr($seq, $i , $rest_length);
                $format_seq .= "\n";
            }
            else {
                $format_seq .= substr($seq, $i , 80);
                $format_seq .= "\n";
            }
        }
        $format_seq .= "\n";
        $count++;
    }
    my $fasta_file = "$TMPDIR/$file_name";
    my $FF = new IO::File ">$fasta_file" or die "Cannot open : $fasta_file\n";
    print $FF "$format_seq";
    $FF->close();
}

sub CreateHeader {
    my $ct = shift;

    my $format_header  = ">C_$ct ";
    my $modifiers_list = "";
    while (my  ($tag, $hash) = each %$INFO ) {
        while (my  ($field, $value) = each %$hash ) {
            my $modifier = $MODIFIERS->{$field};
            $value       = $SUBINFO->{$field};
            next if $modifier eq '1';
            die "Modifier '$modifier' is not accepted in fasta header\n"
                if !$modifier;
            die "Field '$field' must have a value in subinfo file '$SUBINFOFILE'\n"
                if !$value;
            next if $modifier eq "strand";
            $modifiers_list .= "[$modifier=$value]" if $modifier;
        }
    }
    $format_header .= "$modifiers_list ";
    my $title = $SUBINFO->{'TITLE'};
    $format_header .= "$title\n";
    return $format_header;

}

# ParseGeneList #

sub ParseGeneList {

    open(GF,"<$GENE_NAME_FILE") || die "Can't read file \"$GENE_NAME_FILE\": $!\n";
    my @G_FILE=<GF>;
    close(GF);
    @G_FILE=grep(!/^\s*#|^\s*$/,@G_FILE);
    foreach my $line (@G_FILE) {
        chomp($line);
        my @fields=split(/\|/,$line);
        if (@fields != 6) {
        print "Error in $GENE_NAME_FILE: six fields needed at\n-> $line\n";
        next;
        }
    grep(s/^\s+// && 0,@fields);  # Remove leading spaces
    grep(s/\s+$// && 0,@fields);  # Remove trailing spaces
    grep(s/^-$//  && 0,@fields);  # Replace dashes by empty strings.
    my $names        = $fields[0];
    my $genetype     = $fields[1] || "Unknown";
    my $geneproduct  = $fields[2] || "";
    my $genefunction = $fields[3] || "";
    my $genecomplex  = $fields[4] || "";
    my $geneecnumber = $fields[5] || "";
    my @names=split(/=/,$names);
    my $prefered=$names[0];
    foreach my $name (@names) {
        my $lname="\L$name";
        $geneproduct = "Aspartate" if lc($geneproduct) =~ m/aspartic acid/;
        $geneproduct = "Glutamate" if lc($geneproduct) =~ m/glutamic acid/;
        $GENE_NAME->{$lname} = { 'NAMES'     => $prefered,
                                 'TYPES'     => $genetype,
                                 'PRODUCTS'  => $geneproduct,
                                 'FUNCTIONS' => $genefunction,
                                 'COMPLEXES' => $genecomplex,
                                 'ECNUMBERS' => $geneecnumber};
         }
    }
}

sub ParseQualifList {
    open(FH,"<$QUALIF_FILE") ||
        die "Package Gene_Name: Can't read file \"$QUALIF_FILE\": $!\n";
    my @FILE=<FH>;
    close(FH);

    @FILE=grep(!/^\s*#|^\s*$/,@FILE);

    foreach my $line (@FILE) {
        my $n=$line;
        my @fields = split(/\|\s+/,$line);
        foreach my $field (@fields){
            $field =~ s/^\s+//;
            $field =~ s/\s+$//;
        }
        my ($qualifier,$classes,$text,$usage,$name) = ($fields[0],$fields[1],$fields[2],$fields[3],$fields[4]);
        die "Package OGMP_Qualifs: can't parse line:\n$line" unless $qualifier;
        $OGMP_QUALS_CLASSES->{$qualifier}=$classes;
        $OGMP_QUALS_TEXTS->{$qualifier}=$text if $text;
        $OGMP_QUALS_USAGE->{$qualifier}=$usage;
        $OGMP_QUALS_GENENAME->{$qualifier}=$name;
    }
    $OGMP_QUALS_TO_NOTE->{"substitution"}++;$OGMP_QUALS_TO_NOTE->{"polymorph"}++;$OGMP_QUALS_TO_NOTE->{"non-silent"}++;
    $OGMP_QUALS_TO_NOTE->{"deletion"}++;$OGMP_QUALS_TO_NOTE->{"repeat_element"}++;$OGMP_QUALS_TO_NOTE->{"codon_altern"}++;
    $OGMP_QUALS_TO_NOTE->{"translate_altern"}++;$OGMP_QUALS_TO_NOTE->{"organization"}++;
}


# CreateTableFile #

sub CreateTableFile {
    my $count = 0;
    my $file_name   = "$MF_NAME_S.tbl";
    my $feature_tab = "";
    foreach my $contig (@$CONTIGS) {
        my $annotations   = $contig->get_annotations();
        my $seq           = $contig->get_sequence();
        my $seq_length    = $contig->get_sequencelength();
        my $PubMedId      = $SUBINFO->{'CITATION'};
        my @PubMedId    = split(",",$PubMedId);
        my $Already_annot = {}; # Hash use for trans-spliced gene.
        my $annots_no_comment = [];
           $feature_tab      .= ">Feature C_$count Table1\n";
       foreach my $id (@PubMedId) {
            my $seq_len = $contig->get_sequencelength();
            $feature_tab .= "1\t$seq_len\tREFERENCE\n";
            $feature_tab .= "\t\t\tPubMed\t$id\n";
        }

        my ($cg_orfs,$ct_orfs)   = ({},{});

        foreach my $annot (@$annotations) {
            my $type = $annot->get_type();
            my $name = $annot->get_genename();
            next if $type eq "C";
            $cg_orfs->{$name}++ if defined($name) && $name =~ m/orf\d+/;
            push(@$annots_no_comment,$annot);
        }

        my $AP_key = {};
        foreach my $annot (@$annots_no_comment) {
            next if ($annot->get_type() ne "G" && $annot->get_type() ne "S");
            my $id_AP  = $1 if scalar($annot) =~ m/0x(.+)\)/;
            $AP_key->{$id_AP} += 1;
            next if $AP_key->{$id_AP} != 1;
            my $name = $annot->get_genename();
               $name = $1 if $name =~ m#(.+)_[a-z]$#;
            next if $name =~ m/byp\d+$/;
            $feature_tab = &AddCitationInfo($annot,$feature_tab) if ($name =~ m/citation/i);
            next if $name =~ m/citation/i;
            next if $Already_annot->{$name};
            my $start = $annot->get_startpos();
            my $end   = $annot->get_endpos();

            # Annotation of gene
            $ct_orfs->{$name}++ if $cg_orfs->{$name} && $cg_orfs->{$name} > 1;
            my $annot_who_overlap = &WhatOverlapsThis($start,$end,$contig);
            my $orfnum = $ct_orfs->{$name};
            ($feature_tab,$Already_annot) = &AddInfoForGene($annot_who_overlap,$feature_tab,$name,$orfnum,$contig,$Already_annot,$seq_length);
        }
        $feature_tab .= "\n";
        $count++;
    }
    my $table_file = "$TMPDIR/$file_name";
    my $TF = new IO::File ">$table_file" or die "Cannot open : $table_file\n";
    print $TF "$feature_tab";
    $TF->close();
}

sub AddInfoForGene {
    my ($annot_who_overlap,$feature_tab,$name,$orfnum,$contig,$Already_annot,$seq_length) = @_;

    # Separate X element
    my ($annot_who_overlap_w_byp,$annot_byp) = ((),());
    foreach my $annotation_who_overlap (@$annot_who_overlap){
      if (scalar(@$annotation_who_overlap) != 1) {
        push(@$annot_who_overlap_w_byp,$annotation_who_overlap);
        next;
      }
      foreach my $feature_who_overlap (@$annotation_who_overlap) {
          my $annot  = $feature_who_overlap->[2];
          my $a_name = $annot->get_genename();
          ($a_name =~ m/byp\d+$/) ? push(@$annot_byp,$annot) : push(@$annot_who_overlap_w_byp,$annotation_who_overlap);
      }
    }

    my $AP_key = {};
    my $X_key  = {};
    foreach my $annotation_who_overlap (@$annot_who_overlap_w_byp){
        my ($CDS_info,$toAdd,$EI_info,$CIT_info,$id) = ("","","","","");
        my ($exons,$introns,$gene,$citation) = ({},{},{},{});
        my ($g_noStart,$g_noStop,$g_genequalifs,$g_CDSqualifs,$g_isTransSplice,$g_type) = ("","","","","","");

        foreach my $feature_who_overlap (@$annotation_who_overlap) {
            my $annot  = $feature_who_overlap->[2];
            my $a_suffix = "";
            my $a_name = $annot->get_genename();
            ($a_name,$a_suffix) = ($1,"_$2") if $a_name =~ m#(.+)_([a-z])#;
            my $id_AP  = $1 if scalar($annot) =~ m/0x(.+)\)/;
            $AP_key->{$id_AP} += 1;
            my $already_treat = $AP_key->{$id_AP} != 1 ? 1 : 0;
            last if $already_treat == 1 ;

            next if $a_name  =~ m/citation/i;
            my $a_type    = $annot->get_type();
            my $start     = $annot->get_startpos();
            my $end       = $annot->get_endpos();
            my $startline = $annot->get_startline();
            my $isMinus   = ( $annot->get_direction eq "==>" ? 0 : 1);
            if ($annot_byp) {
              @$annot_byp  = $isMinus ? sort { $b->get_startpos() <=> $a->get_startpos() } @$annot_byp
                     : sort { $a->get_startpos() <=> $b->get_startpos() } @$annot_byp;
            }

               $id        =" G-$name" if $a_type eq "G";
            my $add       = $orfnum ? "_$orfnum" : "";
               $add       = !$add && $startline =~ m/G-\S+(_\d+)/ ? $1 : $add;
            my $ac        = $startline =~ m/G-trn.\((.+)\)/ ? lc($1) : "" ;
               $ac        =~ tr/u/t/;
               $ac        = "\($ac\)" if $ac;

            my $lname     = lc($name);
            my $product   = $GENE_NAME->{$lname}->{"PRODUCTS"}  || "";
               $product   = "tRNA X" if $lname =~ m/trnx/;
            my $function  = $GENE_NAME->{$lname}->{"FUNCTIONS"}  || "";
            my $EC_number = $GENE_NAME->{$lname}->{"ECNUMBERS"} || "";
            my $type      = $GENE_NAME->{$lname}->{"TYPES"}     || "";
               $type      = $type eq "protein" ? "CDS" : $type;
           ($product,$type) = ("hypothetical protein","CDS") if $name =~ m/orf/;

            next if $name ne $a_name;

            my ($gene_qualifs,$CDS_qualifs,$none_qualifs,
                $noStart,$noStop,
                $isTransSplice,$haveRef,$isPseudo,$FKey,
                $CDS_notes,$gene_notes,$none_notes)
                = &TreatQualifiers($annot);
            $noStart = "" if !$noStart;
            $noStop  = "" if !$noStop;

            my $all_notes = [@$CDS_notes,@$gene_notes,@$none_notes];
            # Add information about Sig
            if ($a_type eq "S") {
                $feature_tab = &TreatMotAndSig($feature_tab,$noStart,$start,$g_noStop,$end,$a_name,$none_qualifs,$FKey,$add,$startline,$a_type,$all_notes);
            }
            elsif ($a_type eq "G") {
                  ($g_noStart,$g_noStop,$g_genequalifs,$g_CDSqualifs,$g_isTransSplice,$g_type)
                = ($noStart  ,$noStop  ,$gene_qualifs ,$CDS_qualifs ,$isTransSplice  ,$type);

                # Add information about Var
                if ($a_name =~ m/^var/i) {
                    $feature_tab = &TreatVar($feature_tab,$noStart,$start,$g_noStop,$end,$a_name,$none_qualifs,$all_notes);
                    next;
                }
                # Add information about Mot
                elsif ($a_name =~ m/^mot/i) {
                    $feature_tab = &TreatMotAndSig($feature_tab,$noStart,$start,$g_noStop,$end,$a_name,$none_qualifs,$FKey,$add,$startline,$a_type,$all_notes);
                    next;
                }

                # Add gene with introns
                elsif ($isTransSplice) {
                    $Already_annot->{$name}++;
                    ($exons,$introns,$gene,$citation,$CDS_qualifs) = &TreatTransSplice($contig,$name,$annotation_who_overlap);
                    $feature_tab = &AddGeneInfo($gene,$name,$add,$ac,$feature_tab,$g_noStart,$g_noStop,$gene_notes); # A verifier
                }

                # Add gene if without transplice just gene information
                else {
                    my $type = $name =~ m/RNA/ ? "misc_RNA" : "gene";
                    $feature_tab .= $noStart.$start."\t".$g_noStop.$end."\t$type\n";
                    $feature_tab .= "\t\t\tgene\t$name$a_suffix$ac\n";
                    my $frag_name = $a_suffix;
                       $frag_name =~ s/_//;
                    $feature_tab .= $gene_qualifs if $gene_qualifs;
                    $feature_tab .= $CDS_qualifs  if $CDS_qualifs && $isPseudo;

                    push(@$gene_notes,"fragment $frag_name") if $frag_name;
                    my $cp_num = $add;
                       $cp_num =~ s/_//;
                    push(@$gene_notes,"copy $cp_num") if $cp_num;

                    my $fusion_note = join("; ",@$gene_notes);
                    $feature_tab .= "\t\t\tnote\t$fusion_note\n" if $fusion_note;

                    $CIT_info .= "$start\t$end\tREFERENCE\n" if $haveRef;
                    $CIT_info .= "\t\t\tPubMed\t$haveRef\n"  if $haveRef;
                }

                # CDS info
                next if $isPseudo == 1;
                my $anticodon = &LocateAc($annot,$contig) if $ac;
                $toAdd       .= "\t\t\tproduct\t$product\n\t\t\tprotein_id\tlcl|${id}$add\n";
                $toAdd       .= "\t\t\tEC_number\t$EC_number\n" if $EC_number;
                $toAdd       .= $anticodon                      if $anticodon;

                next if (scalar(@$annotation_who_overlap) != 1);
                # last argument is num_exon in case of gene without exon/intron value is set to 0.
                $CDS_info = &InsertCoordinateOfBypElement($annot_byp,$annot->get_endpos(),$start,$noStart,$end,$g_noStop,$type,$isMinus,$CDS_info,0);

                my $fusion_note = join("; ",@$CDS_notes,@$none_notes);
                $CDS_info .= $toAdd;
                $CDS_info .= "\t\t\tnote\t$fusion_note\n" if $fusion_note;
                $CDS_info .= $CDS_qualifs if $CDS_qualifs;
            }
            # Add information about exon or intron
            next if ($a_type ne "E" && $a_type ne "I");
            next if $g_isTransSplice;

            my $fusion_note  = join("; ",@$CDS_notes) || undef;
               $CDS_qualifs .= "\t\t\tnote\t$fusion_note\n" if $fusion_note;

            # Add information about exon
            if ($a_type eq "E") {
                next unless $startline =~ m#G-(\S+)-E(\d+)#;
                my $exnum = $2;
                $exons->{$exnum} = { 'start' => $start  , 'end'  => $end,
                                     'type'  => $type,    'qualifs' => $CDS_qualifs, 'direction' => $annot->get_direction()  };
                next if !$haveRef;
                $CIT_info .= "$start\t$end\tREFERENCE\n";
                $CIT_info .= "\t\t\tPubMed\t$haveRef\n";
            }
            # Add information about intron
            else {
                next unless $startline =~ m#G-(\S+)-I(\d+)#;
                my $innum = $2;
                $introns->{$innum} = { 'start' => $start  , 'end'  => $end,
                                       'qualifs' => $CDS_qualifs};
                next if !$haveRef;
                $CIT_info .= "$start\t$end\tREFERENCE\n";
                $CIT_info .= "\t\t\tPubMed\t$haveRef\n";
            }
        }

        my $X_info = "";
        foreach my $byp_elem (@$annot_byp) {
          my $id_X  = $1 if scalar($byp_elem) =~ m/0x(.+)\)/;
          $X_key->{$id_X} += 1;
          my $already_treat = $X_key->{$id_X} != 1 ? 1 : 0;
          last if $already_treat == 1 ;
          my $start = $byp_elem->get_startpos();
          my $end   = $byp_elem->get_endpos();
          my $name  = $byp_elem->get_genename();
             $name  = $1 if $name =~ m#(\S+)-X(\d+)#;
             $X_info  .= $start."\t".$end."\tmisc_feature\n";
             $X_info  .= "\t\tgene\t$name\n";
             $X_info  .= "\t\tnote\tnon\-coding translational bypass region in mRNA due to secondary structure formation\n";
        }

        # Added information about CDS, exons/introns and Citation
        ($CDS_info,$EI_info) = &AddExonsInfo($exons,$CDS_info,$EI_info,$g_noStart,$g_noStop,$g_CDSqualifs,$toAdd,$g_type,$annot_byp,$seq_length);
        ($CDS_info,$EI_info) = &AddIntronsInfo($introns,$CDS_info,$EI_info)               if !$g_isTransSplice;
        ($CDS_info,$EI_info) = &AddIntronsInfoForTranspliced($introns,$CDS_info,$EI_info) if $g_isTransSplice;
        $CIT_info            = &AddCitationsInfo($citation) if scalar(keys %$citation);

        $feature_tab .= $CDS_info;
        $feature_tab .= $EI_info;
        $feature_tab .= $X_info;
        $feature_tab .= $CIT_info;
    }
    return ($feature_tab,$Already_annot);
}

sub InsertCoordinateOfBypElement {
    my ($annot_byp,$annot_endpos,$start,$noStart,$end,$g_noStop,$type,$isMinus,$CDS_info,$num_exon) = @_;

    my $nb_elem = $annot_byp ? scalar(@$annot_byp) : 0 ;
    if ($nb_elem != 0) {
        my $cnt     = 0;
        foreach my $byp_elem (@$annot_byp) {
            # First
            if ($cnt == 0) {
                $end = $isMinus ? $byp_elem->get_startpos() + 1 : $byp_elem->get_startpos() - 1;
            }
            # Other
            elsif ( $cnt != 0 ) {
                $start   = $isMinus ? @$annot_byp[$cnt-1]->get_endpos() - 1 : @$annot_byp[$cnt-1]->get_endpos() + 1;
                $end     = $isMinus ?  $byp_elem->get_startpos() + 1 : $byp_elem->get_startpos() - 1;
            }

            $type = "" if $cnt != 0 || $num_exon > 1;
            $CDS_info .= $noStart.$start."\t".$g_noStop.$end."\t$type\n";

            # Last
            if ($cnt + 1 == $nb_elem) {
                $start = $isMinus ?  @$annot_byp[$cnt]->get_endpos() - 1 : @$annot_byp[$cnt]->get_endpos() + 1;
                $end   = $annot_endpos;
                $CDS_info .= $noStart.$start."\t".$g_noStop.$end."\n";
            }
            $cnt++;
        }
    } else {
        $CDS_info .= $noStart.$start."\t".$g_noStop.$end."\t$type\n";
    }
    return $CDS_info;
}

sub TreatQualifiers {
    my ($annot,$isCitation) = @_;

    my $startline    = $annot->get_startline();
    my $name         = $annot->get_genename();
    my $multicomment = $annot->get_startmulticomment();
    my $isMinus      = ($annot->get_direction eq "==>" ? 0 : 1);
    my $start        = $annot->get_startpos();
    my $end          = $annot->get_endpos();
    my $type         = $annot->get_type();

    foreach my $comment (@$multicomment) {
        $startline  =~ s/\\\s*$//;
        $comment    =~ s/^;;\s*/ /;
        $startline .= $comment;
    }

    my $string_for_qualif = $startline;
       $string_for_qualif =~ s/;;.+$//;
       $string_for_qualif = $string_for_qualif =~ m/(<==|==>)\s*(start|point)(.+)/ ? $3 : "";

    # If qualifier invalid, it is ignored
    my %qualifs = ();
    my $err_mess = "";
    if( $err_mess = &ParseQualifiers( $name, $string_for_qualif, \%qualifs ) ){
        chomp($err_mess);
        print "Warning: $err_mess This qualifier is ignored.\n";
    }

    return \%qualifs if $isCitation;

    my ($gene_toAdd,$CDS_toAdd,$none_toAdd,$noStart,$noStop,$isTransSplice,$haveRef,$isPseudo,$FKey,$CDS_notes,$gene_notes,$none_notes)
        = &TreatNote(\%qualifs,$isMinus,$start,$type,$name,$startline);

    return ($gene_toAdd,$CDS_toAdd,$none_toAdd,$noStart,$noStop,$isTransSplice,$haveRef,$isPseudo || 0,$FKey || "",$CDS_notes,$gene_notes,$none_notes);
}

sub TreatNote {
    my ($qualifs,$isMinus,$aa_start,$type,$name,$startline) = @_;

    my ($CDS_toAdd,$gene_toAdd,$none_toAdd,$Fkey) = ("","","","","");
    my ($CDS_notes,$gene_notes,$none_notes)       = ([],[],[]);
    my ($noStart,$noStop,$isTransSplice,$haveRef) = ("","","","","");
    my $aa_end = $isMinus ? $aa_start - 2 : $aa_start + 2;
    my $isIntronic = 1 if $startline =~ m/G-\w+-I\d+-\w+/;
    my $isPseudo   = 0;

    while ( my ($qualif, $value) = each(%$qualifs) ) {
        my $class    = $OGMP_QUALS_CLASSES->{$qualif};
        my $usage    = $OGMP_QUALS_USAGE->{$qualif};
        my $aut_name = lc($OGMP_QUALS_GENENAME->{$qualif});
        my $forGene  = 1 if $usage eq "gene";
        my $forCDS   = 1 if $usage eq "CDS" || $usage eq "all";
           $forCDS   = 1 if $usage eq "intron"; #Special case for intron
        my $forNone  = 1 if $usage eq "none";

        # Validation of qualifiers
        if ($usage ne "none") {
            if ($type eq "I") {
                print "Qualifiers : '$qualif' isn't accepted for Intron\n"
                if      $usage ne "intron" && $usage ne "all";
                next if $usage ne "intron" && $usage ne "all";
            }
            else {
                if ($aut_name =~ m/intronic ORF/) {
                    $aut_name = "ORF";
                    print "Qualifier : '$qualif' is only acepted for intronic gene\n"
                         if !$isIntronic;
                    next if !$isIntronic;
                }
                print "Qualifier : '$qualif' isn't accepted for '$name' is only accepted for $aut_name\n"
                     if $aut_name ne "all" &&  lc($name) !~ m/$aut_name/;
                next if $aut_name ne "all" &&  lc($name) !~ m/$aut_name/;
            }
        }

        # Treatment of qualifiers
        $isPseudo = 1 if $qualif eq "pseudo";

        if ($qualif eq "trans-spliced") {
            $isTransSplice = 1;
        }
        elsif ($qualif eq "citation") {
            $haveRef = $value;
        }
        elsif ($qualif eq "first_aa") {
            my $first_aa_info = "(pos:$aa_start..$aa_end, aa : $value)";
            $CDS_toAdd .= "\t\t\ttransl_except\t$first_aa_info\n";
        }
        elsif ($qualif eq "substitution") {
            my $replacement = $qualifs->{"substitution"} =~ /==>\s*(.+)/ ? lc($1) : "";
            $none_toAdd .= "\t\t\treplace\t$replacement\n";
        }
        elsif ($qualif eq "tandem") {
            $none_toAdd .= "\t\t\trpt_type\ttandem\n";
        }
        elsif ($qualif eq "stem_loop") {
            $Fkey = $qualif;
        }
        elsif ($qualif eq "repeat_element" || $qualif eq "repeat_region") {
            $Fkey = "repeat_region";
        }
        elsif ($class ne "note") {
            $noStart = "<" if $qualif eq "nostart";
            $noStop  = ">" if $qualif eq "nostop";
            if ($qualif ne "nostart" && $qualif ne "nostop") {
                $qualif  = "gene_synonym" if $qualif eq "synonym";
                $gene_toAdd .= "\t\t\t$qualif\t$value\n" if $forGene;
                $CDS_toAdd  .= "\t\t\t$qualif\t$value\n" if $forCDS;
            }
        }
        else {
            my $desc  = $OGMP_QUALS_TEXTS->{$qualif} || "";
            $desc     =~ s/\%s/$value/;
            push(@$CDS_notes,$desc)  if $forCDS;
            push(@$gene_notes,$desc) if $forGene;
            push(@$none_notes,$desc) if $forNone;
        }
    }

    $Fkey = "polyA_site" if ($type eq "S" && ($name =~ m/polyA/i || $name =~ m/polyadenylation/i));

    # If annot is intron or exon
    if ($type eq "I" || $type eq "E") {
        push(@$CDS_notes,@$gene_notes);
        $gene_notes   = [];
    }
    return ($gene_toAdd,$CDS_toAdd,$none_toAdd,$noStart,$noStop,$isTransSplice,$haveRef,$isPseudo,$Fkey,$CDS_notes,$gene_notes,$none_notes);
}

sub TreatVar {
    my ($feature_tab,$noStart,$start,$g_noStop,$end,$a_name,$none_qualifs,$notes) = @_;

    $feature_tab .= $noStart.$start."\t".$g_noStop.$end."\tvariation\n";
    $feature_tab .= $none_qualifs;
    my $varName   = $a_name =~ m/var-(.+)/i ? $1 : "";
    push(@$notes,"Variation $varName") if ($varName);
    my $notes_str = join("; ",@$notes);
    $feature_tab .= "\t\t\tnote\t$notes_str\n";
    return $feature_tab;
}

sub TreatMotAndSig {
    my ($feature_tab,$noStart,$start,$g_noStop,$end,$a_name,$none_qualifs,$FKey,$add,$startline,$type,$notes) = @_;

    $a_name =~ s/-F\d+//;# remove fragment information
    $FKey = $FKey eq "" ? "misc_feature" : $FKey;
    $feature_tab .= $noStart.$start."\t".$g_noStop.$end."\t$FKey\n";
    # Add Elem name
    my $elemName  = $a_name =~ m/mot-(.+)/i ? $1 : "";
    if (($FKey eq "repeat_region" || $FKey eq "misc_feature") && $type eq "S") {
        $feature_tab  .= "\t\t\trpt_family\tsignal $a_name\n" if $FKey eq "repeat_region";
        $elemName = $a_name;
    }
    if ($elemName) {
        $elemName =~ s/\s+$//;
        my $ToAdd = $FKey ne "stem_loop" ? "Motif element" : "Element";
        if ($none_qualifs) {
            push(@$notes,$ToAdd);
        }
        else {
            push(@$notes,"$ToAdd $elemName");
        }
    }
    $feature_tab   .= $none_qualifs if $none_qualifs;

    if ($FKey eq "stem_loop") {
        my $notes_str = join("; ",@$notes);
        $feature_tab .= "\t\t\tnote\t$notes_str\n";
        return ($feature_tab);
    }

    # Add cp number and label
    my $cp_num = $add || "";
       $cp_num =~ s/_//;
    push(@$notes,"Copy number $cp_num") if $cp_num ne "";
    my $label  = "${elemName}$add";
    push(@$notes,"label: $label") if $label ne "";

    my $notes_str = join("; ",@$notes);
    $feature_tab .= "\t\t\tnote\t$notes_str\n";
    return $feature_tab;
}


sub TreatTransSplice {
    my ($contig,$name,$who_overlap) = @_;

    my $all_fragment = &ExtractAllPartOfTransSplice($contig,$name,$who_overlap);
    my ($exons,$introns,$gene,$citation) = ({},{},{},{});
    my $count = 0;
    my $All_CDS_qualifs = "";
    for my $fragment ( sort {$a cmp $b} keys %$all_fragment) {

        my $annot_who_overlap = $all_fragment->{$fragment};
        foreach my $annotation_who_overlap (@$annot_who_overlap){

            foreach my $feature_who_overlap (@$annotation_who_overlap) {
                my $annot     = $feature_who_overlap->[2];
                my $a_name    = $annot->get_genename();
                next if $a_name !~ m/$name/;
                my $type      = $annot->get_type();
                my $start     = $annot->get_startpos();
                my $end       = $annot->get_endpos();
                my $startline = $annot->get_startline();

                my ($gene_qualifs,$CDS_qualifs,$none_qualifs,
                    $noStart,$noStop,
                    $isTransSplice,$haveRef,$isPseudo,$FKey,
                    $CDS_notes,$gene_notes,$none_notes)
                    = &TreatQualifiers($annot);

                my $suffix = $1 if $startline =~ m#G-\S+_([a-z])#;
                if ($haveRef) {
                    $citation->{$count} = { 'start' => $start, 'end' => $end, 'pubmedId' => $haveRef};
                }
                if ($type eq "G"){
                    if ($count == 0) {
                        my $fusion_note   = join("; ",@$gene_notes) || undef;
                           $gene_qualifs .= "\t\t\tnote\t$fusion_note\n" if $fusion_note;
                        $gene->{"$suffix"} = { 'start' => $start, 'end' => $end, 'gene_qualifs' => $gene_qualifs};
                        $All_CDS_qualifs = $CDS_qualifs;
                    }
                    else {
                        $gene->{"$suffix"} = { 'start' => $start, 'end' => $end};
                    }
                }
                # Have exon or intron
                next if $type ne "E" && $type ne "I";
                my $fusion_note  = join("; ",@$CDS_notes) || undef;
                   $CDS_qualifs .= "\t\t\tnote\t$fusion_note\n" if $fusion_note;

                if ($type eq "E") {
                    next unless $startline =~ m#G-(\S+)_([a-z])-E(\d+)#;
                    my ($suffix,$exnum) = ($2,$3);

                    $exons->{"$exnum"} = { 'start' => $start  , 'end'     => $end,
                                           'type'  => $type,    'qualifs' => $CDS_qualifs};
                }
                else {
                    next unless $startline =~ m#G-(\S+)_([a-z])-I(\d+)#;
                    my ($suffix,$innum)  = ($2,$3);

                    $introns->{"$suffix"}->{"$innum"} = { 'start'   => $start, 'end'  => $end,
                                                          'qualifs' => $CDS_qualifs};
                }
                $count++;
            }
        }
    }
    return ($exons,$introns,$gene,$citation,$All_CDS_qualifs);
}

sub ExtractAllPartOfTransSplice {
    my ($contig,$name,$annotation_who_overlap) = @_;

    my $annotlist = $contig->get_annotations() || [];
    my $suffix = "";
    ($name,$suffix) = ($1,$2) if ($name =~ m/(.+)_([a-z])$/);
    my $all_fragment = {};
    $all_fragment->{$suffix} = [$annotation_who_overlap];
    foreach my $annot (@$annotlist) {
        my $a_name   = $annot->get_genename();
        next if !$a_name;

        my $a_type   = $annot->get_type();
        next if $a_type ne "G";

        my $a_start  = $annot->get_startpos();
        my $a_end    = $annot->get_endpos();

        my $a_suffix = "";
        ($a_name,$a_suffix) = ($1,$2) if ($a_name =~ m/(.+)_([a-z])$/);
        next if $name   ne $a_name;
        next if $suffix eq $a_suffix;
        my $annot_who_overlap = &WhatOverlapsThis($a_start,$a_end,$contig);
        die " Problem with '$name' two fragment have same name\n" if $all_fragment->{$a_suffix};
        $all_fragment->{$a_suffix} = $annot_who_overlap;
    }
    return $all_fragment;
}

sub WhatOverlapsThis {
    my ($wstart,$wend,$contig) = @_;

    my $contiglen  = $contig->get_sequencelength();
    my $annotlist  = $contig->get_annotations() || [];

    ($wstart, $wend) = ($wend, $wstart) if $wend < $wstart;

    my @GEI_annots = grep( $_->get_type() =~ m#^[GEIS]$# , @$annotlist);
    my %RanksGEI   = ( G => 0, E => 1, I => 2, S => 3);
    @GEI_annots    = sort { $a->get_genename() cmp $b->get_genename()
                                                or
                     $RanksGEI{$a->get_type()} <=> $RanksGEI{$b->get_type()}
                                                or
                            $a->get_startpos() <=> $b->get_startpos();
                           } @GEI_annots;

    my $groups = [];
    foreach my $annotation (@GEI_annots) {
        my $genename  = $annotation->get_genename(); # Does NOT contain the _1
        my $type      = $annotation->get_type();
        my $startline = $annotation->get_startline();
        my $start     = $annotation->get_startpos();
        my $end       = $annotation->get_endpos();
        my $dir       = $annotation->get_direction() || "==>";

        my ($ostart,$oend) = ($start,$end); # Strandless interval
           ($ostart,$oend) = ($end,$start) if $end < $start;

        if ($type eq "G" || $type eq "S") {
            next unless # No overlap? next
                &OverlappingRegions($contiglen,$start,$end,$dir,$wstart,$wend,">"); # Real dir of orf not important
            push(@$groups, [ [ $ostart, $oend, $annotation ] ] );
            next;
        }

        # For introns and exons
        foreach my $group (@$groups) {
            my $geneinfo = $group->[0]; # First entry of group is always a gene object
            my $gi_name  = $geneinfo->[2]->get_genename();
            next unless $gi_name eq $genename;
            my $gstart = $geneinfo->[2]->get_startpos();
            my $gend   = $geneinfo->[2]->get_endpos();
            my $gdir   = $geneinfo->[2]->get_direction();
            next unless # if E or I is outside of gene
                &OverlappingRegions($contiglen,$start,$end,$dir,$gstart,$gend,$gdir);
            push(@$group, [ $ostart, $oend, $annotation ] );
            #  last; # commented out in case for some reason an exon overlap multiple genes with the same names!
        } # End foreach $group
    } # End foreach $annotation

    # Now, flag the first and last exon of each group in field ->[3] of each group
    foreach my $group (@$groups) {
        $group->[0]->[3] = "";  # Flag set to false for first entry
        if (@$group == 1) {     # Gene with no introns
            $group->[0]->[3] = "YES";
            next;
        }
        my $maxexnum=0;
        my $maxexinfo="";
        for (my $i=1;$i<@$group;$i++) {
            my $info = $group->[$i];
            $info->[3] = "";  # Flag set to false
            next unless $info->[2]->get_type() eq "E";
            my $startline = $info->[2]->get_startline() || "";
            next unless $startline =~ m#G-(\S+)-E(\d+)#;
            my $exnum = $2;
            $info->[3] = "YES"   if $exnum == 1; # First exon flaged
            $info->[4] = "FIRST" if $exnum == 1; # First exon flaged
            $info->[4] = ""      if $exnum != 1; # First exon flaged
            if ($exnum > $maxexnum) {
                $maxexnum = $exnum;
                $maxexinfo = $info;
            }
        }
        if ($maxexinfo) {             # Should always be true here
            $maxexinfo->[3] = "YES";  # This is the last exon of the group
            $maxexinfo->[4] = "LAST"; # This is the last exon of the group
        }
    }
    $groups;
} # End sub

sub OverlappingRegions {
    # Works even for circular genomes and regions that span the gap
    my ($seqlen,$start1,$end1,$dir1,$start2,$end2,$dir2) = @_;

    my $spansgap1 = (($dir1 =~ m#>#) xor ($start1 <= $end1)); # true means spans the gap
    my $spansgap2 = (($dir2 =~ m#>#) xor ($start2 <= $end2)); # true means spans the gap

    if ($spansgap1) {
        if ($dir1 =~ m#>#) {
            $end1 += $seqlen;
        }
        else {
            $start1 += $seqlen;
        }
    }

    if ($spansgap2) {
        if ($dir2 =~ m#>#) {
            $end2 += $seqlen;
        }
        else {
            $start2 += $seqlen;
        }
    }

    ($start1,$end1) = ($end1,$start1) if $start1 > $end1;
    ($start2,$end2) = ($end2,$start2) if $start2 > $end2;

    # Test for overlap
    return 1 if ! ($end1 < $start2 || $start1 > $end2);
    return 0 if $spansgap1 == $spansgap2; # same spanning sit -> we're done

    if (! $spansgap1) {
        $start1 += $seqlen;
        $end1   += $seqlen;
    }

    if (! $spansgap2) {
        $start2 += $seqlen;
        $end2   += $seqlen;
    }

    # Test for overlap again, new situation
    return 1 if ! ($end1 < $start2 || $start1 > $end2);
    return 0; # really, no overlap
} # End sub

sub AddCitationInfo {
    my ($annot,$feature_tab) = @_;

    my $qualifs = &TreatQualifiers($annot,1);
    my $start = $annot->get_startpos();
    my $end   = $annot->get_endpos();

    while ( my ($qualif, $value) = each(%$qualifs) ) {
        print "Only citation qualifiers is used for citation annotation\n"
            if $qualif ne "citation";
        $feature_tab .= "$start\t$end\tREFERENCE\n";
        $feature_tab .= "\t\t\tPubMed\t$value\n";
    }
    return $feature_tab;
}

sub AddGeneInfo {
    my ($gene,$name,$add,$ac,$feature_tab,$g_noStart,$g_noStop,$notes) = @_;

    my $gene_s = scalar keys %$gene;
    my $gene_qualif = "";
    my $count  = 0;

    my $frag_list = "";
    for my $num_p ( sort {$a cmp $b} keys %$gene) {
        $count++;
        my $start       = $gene->{$num_p}->{'start'};
        my $end         = $gene->{$num_p}->{'end'};
           $gene_qualif = $gene->{$num_p}->{'gene_qualifs'} if $count == 1;

        $frag_list .= "fragment $num_p (pos:$start..$end), ";
        if ($count == 1) {
            $feature_tab .= $g_noStart.$start."\t".$end."\tgene\n";
        }
        elsif ($count != $gene_s && $count != 1) {
            $feature_tab .= $start."\t".$end."\n";
        }

        next if $count != $gene_s;
        $feature_tab .= $start."\t".$g_noStop.$end."\n";
        $feature_tab .= "\t\t\tgene\t$name$add$ac\n";
    }
    $frag_list =~ s/, $//;
    push(@$notes,$frag_list);
    $feature_tab .= $gene_qualif if $gene_qualif;
    $feature_tab .= "\t\t\tnote\t$frag_list\n";
    return $feature_tab;
}

sub AddExonsInfo {
    my ($exons,$CDS_info,$EI_info,$g_noStart,$g_noStop,$g_CDSqualifs,$toAdd,$g_type,$annot_byp,$seq_length) = @_;

    my $exons_s = scalar(keys %$exons);
    my $count   = 0;
    # Create information by exons.
    for my $num_e ( sort {$a <=> $b} keys %$exons) {
        my $start    = $exons->{$num_e}->{'start'};
        my $end      = $exons->{$num_e}->{'end'};
        my $exon_dir = $exons->{$num_e}->{'direction'};
        my $exon_annot_byp = [];
        foreach my $byp_elem (@$annot_byp) {
          my $byp_start = $byp_elem->get_startpos();
          my $byp_end   = $byp_elem->get_endpos();
          my $byp_dir   = $byp_elem->get_direction();
          my $is_overlapping = &OverlappingRegions($seq_length,$byp_start,$byp_end,$byp_dir,$start,$end,$exon_dir);
          push(@$exon_annot_byp, $byp_elem) if $is_overlapping;
        }
      $exons->{$num_e}->{'annot_byp'} = $exon_annot_byp;
      my $aa = 0;
    }


    for my $num_e ( sort {$a <=> $b} keys %$exons) {
        $count++;
        my $start          = $exons->{$num_e}->{'start'};
        my $end            = $exons->{$num_e}->{'end'};
        my $qualifs        = $exons->{$num_e}->{'qualifs'} || "";
        my $exon_annot_byp = $exons->{$num_e}->{'annot_byp'};
        my $isMinus        = $exons->{$num_e}->{'direction'} eq "==>" ? 0 : 1;
        $num_e = $1 if $num_e =~ m/[a-z]-(\d+)/;
        if (scalar(@$exon_annot_byp) != 0) {
          $CDS_info = &InsertCoordinateOfBypElement($exon_annot_byp,$end,$start,$g_noStart,$end,$g_noStop,$g_type,$isMinus,$CDS_info,$num_e)
        }
        else {
          $CDS_info .= $count == 1 ? $g_noStart.$start."\t".$g_noStop.$end."\t$g_type\n"
                                    : $g_noStart.$start."\t".$g_noStop.$end."\n";
        }
        $CDS_info .= $toAdd        if $count == $exons_s;
        $CDS_info .= $g_CDSqualifs if $g_CDSqualifs && $count == $exons_s;
        $EI_info  .= "$start\t$end\texon\n";
        $EI_info  .= "\t\t\tnumber\t$num_e\n";
        $EI_info  .= $qualifs if $qualifs;
    }
    return ($CDS_info,$EI_info);
}

sub AddIntronsInfo {
    my ($introns,$CDS_info,$EI_info) = @_;

    # Complet $EI_info
    my $count = 0;
    foreach my $num_i ( sort {$a <=> $b} keys %$introns)  {
        my $start   = $introns->{$num_i}->{'start'};
        my $end     = $introns->{$num_i}->{'end'};
        my $qualifs = $introns->{$num_i}->{'qualifs'} || "";

        $EI_info .= "$start\t$end\tintron\n";
        $EI_info .= "\t\t\tnumber\t$num_i\n";
        $EI_info .= $qualifs if $qualifs;
    }
    return ($CDS_info,$EI_info);
}

sub AddIntronsInfoForTranspliced {
    my ($introns,$CDS_info,$EI_info,$isTranspliced) = @_;

    # In order to know which introns have 2 parts.
    my $IntronCount = {};
    foreach my $suff ( sort {$a cmp $b} keys %$introns)  {
        my $introns_num = $introns->{$suff};
        foreach my $num_i (sort {$a <=> $b} keys %$introns_num){
            $num_i = $num_i =~ m/[a-z]-(\d+)/ ? $1 : $num_i;
            $IntronCount->{$num_i}++;
        }
    }

    # Complet $EI_info
    my $count = 0;
    foreach my $suff ( sort {$a cmp $b} keys %$introns)  {
        my $introns_num = $introns->{$suff};
        foreach my $num_i (sort {$a <=> $b} keys %$introns_num){
            $count++;
            my $start   = $introns_num->{$num_i}->{'start'};
            my $end     = $introns_num->{$num_i}->{'end'};
            my $qualifs = $introns_num->{$num_i}->{'qualifs'} || "";

            $EI_info .= "$start\t$end\tintron\n" if $count == 1;
            $EI_info .= "$start\t$end\n"         if $count != 1;

            next if $count != $IntronCount->{$num_i};
            $EI_info .= "\t\t\tnumber\t$num_i\n";
            $EI_info .= $qualifs if $qualifs;
            $count    = 0;
        }
    }
    return ($CDS_info,$EI_info);
}

sub AddCitationsInfo {
    my $citations = shift;

    my $CIT_info = "";
    foreach my $num_c ( sort {$a cmp $b} keys %$citations)  {
        my $start    = $citations->{$num_c}->{'start'};
        my $end      = $citations->{$num_c}->{'end'};
        my $PubMedId = $citations->{$num_c}->{'pubmedId'} || die "Citation at pos $start..$end have no pubmedId\n";

        $CIT_info .= "$start\t$end\tREFERENCE\n";
        $CIT_info .= "\t\t\tPubMed\t$PubMedId\n";
    }
    return $CIT_info;
}

sub LocateAc {
    my ($annot,$contig) = @_;

    my $isMinus = ( $annot->get_direction eq "==>" ? 0 : 1);
    my $cg_len  = $contig->get_sequencelength();

    my $seq     = $contig->get_sequence();
       $seq     = uc($seq);
       $seq     =~ tr/ACGT/TGCA/ if $isMinus;
       $seq     = reverse $seq   if $isMinus;

    my $o_start = $annot->get_startpos();
    my $o_end   = $annot->get_endpos();
    my $name    = $annot->get_genename();
    my $ac_type = $1 if lc($name) =~ m/trn(.)/;
    print "Can't defined anticodon type for $name at pos : $o_start..$o_end\n" if !$ac_type;
    my $ac      = lc($1) if $annot->get_startline() =~ m/G-trn.\((...)\)/;
       $ac      =~ tr/u/t/;
    print "Can't defined anticodon sequence for $name at pos : $o_start..$o_end\n" if length($ac) != 3;
    # Reverse for minus strand
    my $start   = $isMinus ? $cg_len + 1 - $o_start : $o_start;
    my $end     = $isMinus ? $cg_len + 1 - $o_end   : $o_end;

    # Extract trna sequence
    my ($nb_ex,$start_full_seq) = (0,0);
    while (){
        $start_full_seq = $start + $nb_ex - 1;
        my $prev_nb_ex = $nb_ex;
        my $seq_before_trna = substr($seq,0,$start_full_seq);
        my $only_ex         = $seq_before_trna;
           $only_ex         =~ s/[^!]//g;
           $nb_ex           = length($only_ex);
       last if $nb_ex == $prev_nb_ex || $nb_ex == 0;
    }
    $start_full_seq++;
    my $trna_len = $end - $start + 1;
    $nb_ex    = lc(substr( $seq,$start_full_seq-1,$trna_len));
    $nb_ex    =~ s/[^!]//g;
    $nb_ex    = length($nb_ex);
    my $trna_seq  = lc(substr( $seq,$start_full_seq-1,$trna_len+$nb_ex));

    # Define AC position
    my $uc_ac = uc($ac);
    $trna_seq    =~ s/!+/!/g;
    my $ac_start = index(uc($trna_seq),"!$uc_ac!");
    print "Can't defined anticodon for $name at pos : $o_start..$o_end\n" if $ac_start == -1;
    return if $ac_start == -1;
       $ac_start = $start + $ac_start;
    my $ac_end   = $ac_start + 2;

    # Reverse for minus strand
    $ac_start   = $cg_len + 1 - $ac_start if $isMinus;
    $ac_end     = $cg_len + 1 - $ac_end   if $isMinus;
    $ac         =~ tr/t/u/;

    my $anticodon = $ac_start != -1 ? "\t\t\tanticodon\t\(pos:$ac_start..$ac_end,aa:$ac_type\)\n" : "";
    return ($anticodon);
}

# RunTBL2ASN

sub RunTBL2ASN {
    my $comment = $SUBINFO->{'COMMENT'} || "";
    my $strain  = $SUBINFO->{'STRAIN'} || "";
    my $cmd  = "$TBL2ASN -U T -P T -k m -s T -V vb -t $AUTHORFILE";
       $cmd .= " -y \"$comment\""          if $comment;
       $cmd .= " -j  \"[strain=$strain]\"" if $strain;
       $cmd .= " -p $TMPDIR";
       print "$cmd\n" if $DEBUG;
    system("$cmd");

    my $sqnFile = "$TMPDIR/$MF_NAME_S.sqn";
    my $gbFile  = "$TMPDIR/$MF_NAME_S.gbf";
    my $tbFile  = "$TMPDIR/$MF_NAME_S.tbl";
    my ($sqnOutput,$gbOutput,$tbOutput) = &DefineOutputName();
    copy($sqnFile, $sqnOutput) or die "File '$sqnFile' cannot be copied in '$sqnOutput'.\n";
    my @sqnOut = split(/\//, $sqnOutput);
    $sqnOutput = $sqnOut[-1];
    print "\nDumping Sequin file in '$sqnOutput'\n";
    if ($CREATEGB == 1) {
        copy($gbFile,  $gbOutput)  or die "File '$gbFile' cannot be copied in '$gbOutput'.";
        my @gbOut = split(/\//, $gbOutput);
        $gbOutput = $gbOut[-1];
        print "Dumping GenBank file in '$gbOutput'\n";
    }
    if ($CREATETB == 1) {
        copy($tbFile,  $tbOutput)  or die "File '$tbFile' cannot be copied in '$tbOutput'.";
        my @tbOut = split(/\//, $tbOutput);
        $tbOutput = $tbOut[-1];
        print "Dumping Table file in '$tbOutput'\n";
    }
}

sub DefineOutputName {
    my $current_wd = getcwd();

    my $count = 0;
    my $sqnOutput_pref = "$current_wd/$MF_NAME";
    my $sqnOutput_suff = ".sqn";
    my $sqnOutput      = $sqnOutput_pref.$sqnOutput_suff;

    while (-r ($sqnOutput)) {
        $count++;
        $sqnOutput = $sqnOutput_pref."_".$count.$sqnOutput_suff;
    }

       $count = 0;
    my $gbOutput_pref = "$current_wd/$MF_NAME";
    my $gbOutput_suff = ".gbf";
    my $gbOutput      = $gbOutput_pref.$gbOutput_suff;

    while (-r ($gbOutput)) {
        $count++;
        $gbOutput = $gbOutput_pref."_".$count.$gbOutput_suff;
    }

       $count = 0;
    my $tbOutput_pref = "$current_wd/$MF_NAME";
    my $tbOutput_suff = ".tbl";
    my $tbOutput      = $tbOutput_pref.$tbOutput_suff;

    while (-r ($tbOutput)) {
        $count++;
        $tbOutput = $tbOutput_pref."_".$count.$tbOutput_suff;
    }

    return($sqnOutput,$gbOutput,$tbOutput);
}

sub GetPath {
    my $name_prog = shift;

    foreach my $dir (@PATH) {
        if (-f "$dir/$name_prog") {
            if (-r _ && -x _) {
                return "$dir/$name_prog";
            }
            else {
                die "   -> ERROR: $name_prog is not readable and executable! Please run:\n",
                    "             chmod 755 \"$dir/ $name_prog\"\n";
            }
        last;
        }
    }
    die "-> ERROR: Could not find '$name_prog' in your search path. Please install\n",
        "   $name_prog from the source (see INSTALL.txt).\n";
}