nexus -->
    "#NEXUS" blocks
  .


blocks -->   
    block blocks
  |
  .


block -->   
    "begin"  
      block_declaration  ";"
    end
  .

end -->
    "end"
  | "endblock"
  . 

block_declaration -->
    block_taxa
  | block_characters 
  | block_unaligned
  | block_distances
  | block_data
  | block_codons
  | block_sets
  | block_assumptions
  | block_trees
  | block_notes
  | block_unknown
  .

block_undefined -->
  Any token except "end"


/*----------------------------------------------------------------------*/
/*               TAXA                                                   */
/*----------------------------------------------------------------------*/


block_taxa -->   x
    "taxa" ";"
    "dimensions" "ntax" "=" positive_integer ";
    taxlabels 


/*----------------------------------------------------------------------*/
/*               CHARACTERS                                             */
/*----------------------------------------------------------------------*/
    .
block_data -->   
    "data" ";" 
       "dimensions" 
          newtaxa 
          "nchar" "=" positive_integer 
       ";"
       format_characters
       options_data
       eliminate
       taxlabels_optional
       charstate
       "matrix" matrix_data ";" 
  .

block_characters -->   
    "characters" ";" 
       "dimensions" 
          newtaxa_optional  
          "nchar" "=" positive_integer 
       ";"
       format_characters
       options_data
       eliminate
       taxlabels_optional
       charstate
       "matrix" matrix_data ";"
  .

format_characters -->   
   "format"
      format_characters_list
   ";"
  |
  .

format_characters_list -->
    format_characters_item format_characters_list
  |
  .

format_characters_item -->
  | "gap" "=" character_symbol
  | "matchchar" "=" matchchar_symbol  
  | "transpose"
  | "items" "=" item_value
  | "datatype" "=" datatype_characters_options
  | "respectcase"
  | "interleave"
  | "statesformat" "=" statesformat_option
  | missing
  | symbols 
  | equate 
  | labels
  | tokens
  .


datatype_characters_options -->   
    "standard"
  | "dna"
  | "rna"
  | "nucleotide"
  | "protein"
  | "continuous"
  .


item_value -->
     "(" item_option items_option_list ")"
  |  item_option
  .

item_option -->
    "min"
  | "max"
  | "median"
  | "averange"
  | "variance"
  | "stderror"
  | "samplesize"
  | "states"
  .

item_option_list -->
  | item_option item_option_list
  | 
  .


statesformat_option -->
    "statespresent"
  | "individuals"
  | "count"
  | "frequency"
  .

options_data -->
    "options" options_data_command_list ";"
  |
  .

options_data_command_list -->
    options_command options_command_list
  |
  .

options_data_command -->
    ignore
  | mstaxa
  | zap
  | gapmode
  .

ignore -->
    "ignore" "=" ignore_option
  .

ignore_option -->
    "invar"
  | "uninform"
  .

mstaxa -->
    "mstaxa" "=" mstaxa_option
  .

mstaxa_option -->
    "uncertain"
  | "polymorph"
  | "variable"
  .

zap -->
    "zap" "=" "\"" set "\""
  .

eliminate -->
    "eliminate" positive_integer "-" positive_integer ";"
  |
  .

charstate -->
    charstatelabels
  | charlabels statelabels
  |
  .

charstatelabels -->
    "charstatelabels" charstatelabel_list
  .

charstatelabel_list --> 
    charstatelabel_item charstatelabel_list_rest 
  .

charstatelabel_list_rest -->
    "," charstatelabel_list
  | ";"
  .

charstatelabel_item -->
    positive_integer character_name state_name
  .

character_name -->
    identifier 
  |
  .

state_name -->
    "/" reference reference_list
  |  
  .

charlabels -->
    "charlabels" identifier charlabels_list ";"
  |
  .

charlabels_list -->
    identifier charlabels_list
  |
  .

statelabels -->
    "statelabels" statelabels_list
  |   
  .

statelabels_list -->
     statelabels_item statelabels_list_rest
  .

statelabels_list_rest -->
    "," statelabels_list
  | ";"
  .

statelabels_item -->
    positive_integer reference reference_list
  .


/*----------------------------------------------------------------------*/
/*               UNALIGNED                                              */
/*----------------------------------------------------------------------*/


block_unaligned -->
    "unaligned" ";"
       dimensions_unaligned
       format_unaligned
       taxlabels_optional
       "matrix" matrix_data ";"
  .

dimensions_unaligned -->
   "dimensions"
      newtaxa
   ";"
  |
  .

format_unaligned -->
    "format"
       format_unaligned_list
    ";"
  |
  .

format_unaligned_list -->
    format_unaligned_item format_unaligned_list
  |
  .

format_unaligned_item -->
    datatype_unaligned
    "respectcase"
    missing
    symbols
    equate
    labels
  .

datatype_unaligned -->
    "datatype" "=" datatype_unaligned_options
  .

datatype_unaligned_options -->
    "standard"
  | "dna"
  | "rna"
  | "nucleotide"
  | "protein"
  .


/*----------------------------------------------------------------------*/
/*               DISTANCES                                              */
/*----------------------------------------------------------------------*/


block_distances -->
    "distances" ";"
      dimensions_distances
      format_distances
      taxlabels_optional
      "matrix" matrix_data ";"
  .

dimensions_distances -->
    "dimensions"
       newtaxa
       nchar
    ";"
  |
  ;

nchar -->
    "nchar" "=" positive_integer
  |
  .

format_distances -->
    "format"
       format_distances_list
    ";"
  |
  .

format_distances_list -->
    format_distances_item format_distances_list
  |
  .

format_distances_item -->
  | triangle
  | diagonal
  | labels
  | missing
  | "interleave"
  .

triangle -->
    "triangle" "=" triangle_option
  .

triangle_option -->
    "lower"
  | "upper"
  | "both"
  .

diagonal -->
    "diagonal"
  | "nodiagonal"
  .


/*----------------------------------------------------------------------*/
/*               CODONS                                                 */
/*----------------------------------------------------------------------*/


block_codons -->
    "codons" ";"
       codonposset
       geneticcode
       codeset
  .

codonposset -->
    "codonposset" star identifier codonposet_rest ";"
  |
  .

codonposet_rest -->
    "=" codonposed_standard
  | "(" codonposet_format
  .

codonposet_format -->
    "standard" ")" "=" codonposet_standard
  | "vector" ")" "=" definition_tokens_vector
  .

codonposed_standard -->
    "n" ":" set ","
    "1" ":" set ","
    "2" ":" set ","
    "3" ":" set
  .

geneticcode -->
      "geneticcode" identifier
         geneticcode_option_list
         "=" geneticcode_description
      ";"
   |
   .

geneticcode_option_list -->
      "(" geneticcode_option geneticcode_option_sequence ")"
   |
   .

geneticcode_option_sequence -->
      geneticcode_option geneticcode_option_sequence
   |
   .

geneticcode_option -->
     "codeorder" "=" positive_integer
   | "nucorder" "=" identifier
   | tokens
   | "extensions" "=" "\"" identifier_list "\""        
   .

geneticcode_description -->
     geneticcode_list "\n" geneticcode_list
   | ";"
   .

geneticcode_list -->
     geneticcode_symbol geneticcode_list
   |
   .

codeset -->
     "codeset" star identifier codeset_type "=" codename_list ";"
   |
   .   

codeset_type -->
   "(" codeset_type_name ")"
   |
   .

codeset_type_name -->
     "characters"
   | "unaligned"
   | "taxa"
   .

codename_list -->
     codename codename_list
   |
   .

codename -->
     identifier ":" set
   .


/*----------------------------------------------------------------------*/
/*           SETS                                                       */
/*----------------------------------------------------------------------*/


block_sets -->
    "sets" ";"
     set_command_list
  .

set_command_list -->
    set_command set_command_list
  |
  .

set_command -->
  | charset
  | stateset
  | changeset
  | taxset
  | treeset
  | charpartition
  | taxpartition
  | treepartition
  .

stateset -->
    "stateset" identifier block_set_definition ";"
  .

changeset -->
    "changeset" identifier "=" change_set ";"
  .

treeset -->
    "treeset" identifier block_set_definition ";"
  .

charpartition -->
    "charpartition" identifier definition ";"
  .

taxpartition -->
    "taxpartition" identifier definition ";"
  .

treepartition -->
    "treepartition" identifier definition ";"
  .


change_set -->
     change_item changeset_direction change_item change_set
   |
   .

change_item -->
     identifier
   | state_symbol
   | "(" state_set ")"
   .

changeset_direction -->
     "<" "-" ">"
   | "-" ">"
   .


/*----------------------------------------------------------------------*/
/*           ASSUMPTIONS                                                */
/*----------------------------------------------------------------------*/


block_assumptions -->
    "assumptions" ";"
       assumptions_command_list;
  .


assumptions_command_list -->
    assumptions_command assumptions_command_list
  |
  .

assumptions_command -->
    options
  | usertype
  | typeset
  | wtset
  | exset
  | ancstates
  | taxset
  | charset
  .

options -->
     "options" options_command_list ";"
  .

usertype -->
     "usertype" identifier "(" usertype_definition ";"
   .

typeset -->
     "typeset" star identifier definition_tokens ";"
   .

wtset -->
     "wtset" star identifier definition ";"
   .

exset -->
      "exset" star identifier definition_notokens ";"
   .

ancstates -->
      "ancstates" star identifier definition_standard ";"
   .


options_command_list -->
    options_command options_command_list
  |
  .

options_command -->
    deftype 
  | polytcount 
  | gapmode
  .

deftype -->
     "deftype" "=" identifier
   .

polycount -->
     "polycount_type" "=" polycount_type
   .

polycount_type -->
     "minsteps"
   | "maxsteps"
   .


usertype_definition -->
     "stepmatrix" ")" "=" positive_integer "\n" state_word_list stepmatrix_row_list
   | "cstree" ")" "=" cstree
   .

stepmatrix_row_list -->
     "\n" stepmatrix_row stepmatrix_row_list
   |
   .

stepmatrix_row -->
     stepmatrix_item stepmatrix_row
   |
   .

stepmatrix_item -->
     numeric
   | "."
   | "i"
   .
 
cstree -->
     "(" cstree cstree_list ")" state_symbol_optional 
   |  state_symbol
   .

cstree_list -->
     "," cstree cstree_list
  |
  .


/*----------------------------------------------------------------------*/
/*           TREES                                                      */
/*----------------------------------------------------------------------*/


block_trees -->
    "trees" ";"
       translate
       tree      
  .

translate -->
    "translate" reference reference translate_list ";"
  |
  .

translate_list -->
    "," reference reference translate_list 
  |
  .

tree -->
    "tree" tree_rest
  | "utree" tree_rest
  |
  .

tree_rest -->
    star identifier "=" root tree_definition ";" tree
  .

tree_definition -->
    "(" tree_definition tree_list ")" tree_label_optional
  |  tree_label
  .

root -->
  | "[&R]"
  | "[&U]"
  |
  .

tree_list -->
     "," tree_definition tree_list
  |
  .

tree_label -->
    identifier length
  | numeric length
  .

tree_label_optional -->
    tree_label
  |
  .

length -->
    ":" numeric
  |
  .


/*----------------------------------------------------------------------*/
/*           NOTES                                                      */
/*----------------------------------------------------------------------*/


block_notes -->   
    "notes" ";"
      block_notes_command_list
  .

block_notes_commnad_list -->
    block_notes_command block_notes_command_list
  |
  .

block_notes_command -->
    text
  | picture
  .

text -->
    "text" text_options "text" "=" identifier ";"
  |
  .

text_options -->
    taxon
    character
    state
    tree_set
    source
  .

taxon -->
    "taxon" "=" set_description
  |
  . 

character -->
    "character" "=" set_description
  |
  .

state -->
    "state" "=" set_description
  |
  .

tree_set -->
    "tree" "=" set_description
  |
  .

set_description -->
    reference
  | "(" set ")"
  .

source -->
   "source" "=" source_option
  |
  .

source_option -->
    "inline"
  | "file"
  | "resource"
  .

picture -->
    "picture" 
       picture_options 
       "source" "=" source_option 
       "picture" "=" identifier
    ";"
  |
  .

picture_options -->
    taxon
  | character
  | state
  | tree_set
  | format
  | encode
  .

format -->
    "format" "=" format_option
  |
  .

encode -->
    "encode" "=" encode_option
  |
  .
   
source_option -->
    "inline"
  | "file"
  | "resource"
  .
  
format_option -->
    "pict"
  | "tiff"
  | "eps"
  | "jpeg"
  | "gif"
  .

encode_option -->
    "none"
  | "uuencode"
  | "binhex"
  .


/*----------------------------------------------------------------------*/
/*           COMMENTS                                                   */
/*----------------------------------------------------------------------*/


comment -->
   command_comment
 | text_comment
 .

text_comment -->
   "[" text_comment_content "]"
 |
 .

text_comment_content -->
   text_comment
 | no_bracket_token text_comment_content
 |
 .

no_braket_token -->
   Any token except "]"
 .

command_comment -->
  | "[" "\" "i" "]"
  | "[" "\" "b" "]"
  | "[" "\" "u" "]"
  | "[" "\" "p" "]"
  .


/*----------------------------------------------------------------------*/
/*           COMMON                                                     */
/*----------------------------------------------------------------------*/


/* ----------------------------------------------------------------------------
   Formatted object definition rules used by object definition commands 
   ---------------------------------------------------------------------------- */

definition -->
    "(" format_tokens_standard
  | "=" definition_tokens_standard
  .

definition_tokens -->
    "(" definition_tokens_rest
  | "=" definition_tokens_standard
  .

definition_tokens_rest -->
    "standard" ")" "=" definition_tokens_standard
  | "vector" ")" "=" definition_tokens_vector
  .


definition_notokens -->
    "(" definition_notokens_rest
  | "=" definition_notokens_standard
  .

definition_notokens_rest -->
    "standard" ")" "=" definition_notokens_standard
  | "vector" ")" "=" definition_notokens_vector
  .


definition_standard -->
    "(" format_unknown_standard
  | "=" definition_unknown_standard
  . 

format_unknown_standard -->
    "tokens" format_tokens_standard 
  | "notokens" format_notokens_standard
  | "standard" format_unknown_standard
  | "vector" format_unknown_vector
  | ")" "=" definition_unknown_standard
  .

format_unknown_vector -->
    "tokens" format_tokens_vector 
  | "notokens" format_notokens_vector
  | "standard" format_unknown_standard
  | "vector" format_unknown_vector
  | ")" "=" definition_unknown_vector
  .

format_tokens_standard -->
    "tokens" format_tokens_standard 
  | "notokens" format_notokens_standard
  | "standard" format_tokens_standard
  | "vector" format_tokens_vector
  | ")" "=" definition_tokens_standard
  .

format_notokens_standard -->
    "tokens" format_tokens_standard 
  | "notokens" format_notokens_standard
  | "standard" format_notokens_standard
  | "vector" format_notokens_vector
  | ")" "=" definition_notokens_standard
  .

format_tokens_vector -->
    "tokens" format_tokens_vector 
  | "notokens" format_notokens_vector
  | "standard" format_tokens_standard
  | "vector" format_tokens_vector
  | ")" "=" definition_tokens_vector
  .

format_notokens_vector -->
    "tokens" format_tokens_vector 
  | "notokens" format_notokens_vector
  | "standard" format_notokens_standard
  | "vector" format_notokens_vector
  | ")" "=" definition_notokens_vector
  .

definition_tokens_standard -->
    reference ":" set token_standard_list
  .

definition_notokens_standard -->
    set
  .

definition_tokens_vector -->
    reference_list
  .

definition_notokens_vector -->
    nonsemicolon_word_list
  .

definition_unknown_standard -->
    nonsemicolon_word_list
  .

definition_unknown_vector -->
    nonsemicolon_word_list
  .

token_standard_list -->
    "," reference ":" set token_standard_list
  |
  .

nonsemicolon_word_list --->
    nonsemicolon_word nonsemicolon_word_list
  |
  .


/* ----------------------------------------------------------------------------
   Matrix definition rules 
   ---------------------------------------------------------------------------- */

matrix_data -->
    identifier matrix_entry_list matrix_data_rest
  |
  . 

matrix_data_rest -->
    "\n" matrix_data
  |
  .

matrix_entry_list -->
    state_word matrix_entry_list
  | "(" state_composed_word state_composed_list ")" matrix_entry_list  
  | "{" state_composed_word state_composed_list "}" matrix_entry_list
  |
  .

state_composed_word -->
    state_complex_word 
  | state_complex_word ":" state_complex_word 
  .
  
state_complex_word -->
     positive_integer
  |  state_word
  .

state_composed_list -->
    state_composed_word state_composed_list
  |
  .

/* ----------------------------------------------------------------------------
   Equate definition rules 
   ---------------------------------------------------------------------------- */

equate -->   
    "equate" "=" "\"" equate_element equate_list "\""
  .

equate_list -->   
     equate_element equate_list
   |
   .

equate_element -->   
     equate_symbol "=" equate_element_definition
   .

equate_element_definition -->
     "(" equate_word_list ")"
   | equate_symbol
   .  

equate_word_list -->
     equate_word equate_word_list
  |
  .


/* ----------------------------------------------------------------------------
   Set definition rules 
   ---------------------------------------------------------------------------- */

set -->
    set_item  set
  |
  .

set_item -->
    "all" set_period
  | "reminder"
  |  reference set_item_range
  .

set_item_range -->
    "-" set_item_range_identifier set_period
  |
  .

set_period -->
    "\" positive_integer
  |
  .

set_item_range_identifier -->
    "."
  | reference
  .


/* ----------------------------------------------------------------------------
   Individual rules used by various commands or blocks
   ---------------------------------------------------------------------------- */

block_set_definition -->
     "=" set
   | "(" block_set_format
   .

block_set_format -->
     "standard" ")" "=" set
   | "vector" ")" "=" binary_word_list
   .

charset -->
    "charset" identifier block_set_definition ";"
  .

gapmode -->
    "gapmode" "=" gapmode_type
   .

gapmode_type -->
     "missing"
   | "newstate"
   .

identifier_list -->   
    identifier identifier_list
  | 
  .

labels -->
    "labels"
  | "nolabels"
  .

missing -->  
    "missing" "=" character_symbol
  .

newtaxa -->
    "newtaxa" ntax
  | ntax
  .

newtaxa_optional -->   
    newtaxa
  |
  .

ntax -->
    "ntax" "=" positive integer
  .

reference -->
    positive_integer
  | identifier
  .

reference_list -->
    reference reference_list
  |
  .
 
star -->
    "*"
  |
  .

state_symbol_optional -->
    state_symbol
  |
  .

symbols -->   
    "symbols" "=" "\"" state_symbol_list "\""
  .

state_word_list -->
    state_word state_word_list
  | 
  .

taxlabels -->
    "taxlabels" identifier identifier_list ";"
  .

taxlabels_optional -->
    taxlabels
  |
  .

taxset -->
    "taxset" identifier block_set_definition ";"
  .

tokens -->
    "tokens"
  | "notokens"
  .

/* ----------------------------------------------------------------------------
   Special token matching rules
   ---------------------------------------------------------------------------- */
equate_word -->
   A token composed by equate symbols

state_word -->
   A token composed by state symbols

binary_word_list -->
   List of words where each word is composed only by 0 or 1 numbers.

identifier -->
  A token satisfing the regular expression [_\w]+[\d\w\._]*. Note that an single
  _ is considered a valid identifier. In most contexts a single _ means a
  "don't care identifier", simmilar to the _ meaning in prolog.

numeric -->
   A number on any format, integer or real.

positive_integer -->
  An integer greater than 0. Must satisfy the regular expression [1-9][\d]*

equate_symbol -->
  Any character except any of the following: \n()[]{}/\,;:*`'"<>^ or any of the
  currently defined as missing, gap, matchchar, or symbol.

character_symbol -->
  Any character except any of the following: \n\s()[]{}<>/\,;:=*^'"

matchchar_symbol -->
  Any symbol except any of the following: \n\s()[]{}/\,;:=*'"`<>^ 

state_symbol -->
  Any symbol except any of the following \n()[]{}<>/\,;:=*'"`~ and space
  symbols in a state_symbol_string are ignored. For example " 1 2 3 " is
  equivalent to "123"

geneticcode_symbol -->
  An identifier or a number


/*----------------------------------------------------------------------*/
/*           ID                                                         */
/*----------------------------------------------------------------------*/


/* 
   This is a context free grammar describing an item in NEXUS. An item is a
   syntactic element.  Intuitively an item can be a single token, or a single
   comment, or a combination of tokens and comments. The term item raised
   because in NEXUS, single syntactic elements can be composed by lists of
   comments anywhere, that is, at the middle, begining, or end. Moreover,
   comments can be nested. Therefore a simple deterministic finite automata it
   is not enough to recognize a syntactic element of NEXUS. That is why we use
   the following grammar to recoginize items in a NEXUS file.
*/


item_list -->
    item item_list
  | end_of_file
  .

item -->
    comment comment_rest
  | token token_rest
  .

comment_rest-->
  | comment comment_rest
  | token comment_rest
  | space
  .

token_rest -->
  | comment comment_rest
  | space
  .

comment -->
   "[" comment_token_list "]"
  .

comment_token_list -->
    comment_token comment_token_list
  | comment
  |
  .

comment_token -->
    Any token except the square brakets, that is, except "[" or "]".

space -->
    Any sequence of characters defined as a BLANK CHARACTER in NEXUS.

end_of_file -->
    The end of file token