nexus --> "#NEXUS" blocks . blocks --> block blocks | . block --> "begin" block_declaration ";" end . end --> "end" | "endblock" . block_declaration --> block_taxa | block_characters | block_unaligned | block_distances | block_data | block_codons | block_sets | block_assumptions | block_trees | block_notes | block_unknown . block_undefined --> Any token except "end" /*----------------------------------------------------------------------*/ /* TAXA */ /*----------------------------------------------------------------------*/ block_taxa --> x "taxa" ";" "dimensions" "ntax" "=" positive_integer "; taxlabels /*----------------------------------------------------------------------*/ /* CHARACTERS */ /*----------------------------------------------------------------------*/ . block_data --> "data" ";" "dimensions" newtaxa "nchar" "=" positive_integer ";" format_characters options_data eliminate taxlabels_optional charstate "matrix" matrix_data ";" . block_characters --> "characters" ";" "dimensions" newtaxa_optional "nchar" "=" positive_integer ";" format_characters options_data eliminate taxlabels_optional charstate "matrix" matrix_data ";" . format_characters --> "format" format_characters_list ";" | . format_characters_list --> format_characters_item format_characters_list | . format_characters_item --> | "gap" "=" character_symbol | "matchchar" "=" matchchar_symbol | "transpose" | "items" "=" item_value | "datatype" "=" datatype_characters_options | "respectcase" | "interleave" | "statesformat" "=" statesformat_option | missing | symbols | equate | labels | tokens . datatype_characters_options --> "standard" | "dna" | "rna" | "nucleotide" | "protein" | "continuous" . item_value --> "(" item_option items_option_list ")" | item_option . item_option --> "min" | "max" | "median" | "averange" | "variance" | "stderror" | "samplesize" | "states" . item_option_list --> | item_option item_option_list | . statesformat_option --> "statespresent" | "individuals" | "count" | "frequency" . options_data --> "options" options_data_command_list ";" | . options_data_command_list --> options_command options_command_list | . options_data_command --> ignore | mstaxa | zap | gapmode . ignore --> "ignore" "=" ignore_option . ignore_option --> "invar" | "uninform" . mstaxa --> "mstaxa" "=" mstaxa_option . mstaxa_option --> "uncertain" | "polymorph" | "variable" . zap --> "zap" "=" "\"" set "\"" . eliminate --> "eliminate" positive_integer "-" positive_integer ";" | . charstate --> charstatelabels | charlabels statelabels | . charstatelabels --> "charstatelabels" charstatelabel_list . charstatelabel_list --> charstatelabel_item charstatelabel_list_rest . charstatelabel_list_rest --> "," charstatelabel_list | ";" . charstatelabel_item --> positive_integer character_name state_name . character_name --> identifier | . state_name --> "/" reference reference_list | . charlabels --> "charlabels" identifier charlabels_list ";" | . charlabels_list --> identifier charlabels_list | . statelabels --> "statelabels" statelabels_list | . statelabels_list --> statelabels_item statelabels_list_rest . statelabels_list_rest --> "," statelabels_list | ";" . statelabels_item --> positive_integer reference reference_list . /*----------------------------------------------------------------------*/ /* UNALIGNED */ /*----------------------------------------------------------------------*/ block_unaligned --> "unaligned" ";" dimensions_unaligned format_unaligned taxlabels_optional "matrix" matrix_data ";" . dimensions_unaligned --> "dimensions" newtaxa ";" | . format_unaligned --> "format" format_unaligned_list ";" | . format_unaligned_list --> format_unaligned_item format_unaligned_list | . format_unaligned_item --> datatype_unaligned "respectcase" missing symbols equate labels . datatype_unaligned --> "datatype" "=" datatype_unaligned_options . datatype_unaligned_options --> "standard" | "dna" | "rna" | "nucleotide" | "protein" . /*----------------------------------------------------------------------*/ /* DISTANCES */ /*----------------------------------------------------------------------*/ block_distances --> "distances" ";" dimensions_distances format_distances taxlabels_optional "matrix" matrix_data ";" . dimensions_distances --> "dimensions" newtaxa nchar ";" | ; nchar --> "nchar" "=" positive_integer | . format_distances --> "format" format_distances_list ";" | . format_distances_list --> format_distances_item format_distances_list | . format_distances_item --> | triangle | diagonal | labels | missing | "interleave" . triangle --> "triangle" "=" triangle_option . triangle_option --> "lower" | "upper" | "both" . diagonal --> "diagonal" | "nodiagonal" . /*----------------------------------------------------------------------*/ /* CODONS */ /*----------------------------------------------------------------------*/ block_codons --> "codons" ";" codonposset geneticcode codeset . codonposset --> "codonposset" star identifier codonposet_rest ";" | . codonposet_rest --> "=" codonposed_standard | "(" codonposet_format . codonposet_format --> "standard" ")" "=" codonposet_standard | "vector" ")" "=" definition_tokens_vector . codonposed_standard --> "n" ":" set "," "1" ":" set "," "2" ":" set "," "3" ":" set . geneticcode --> "geneticcode" identifier geneticcode_option_list "=" geneticcode_description ";" | . geneticcode_option_list --> "(" geneticcode_option geneticcode_option_sequence ")" | . geneticcode_option_sequence --> geneticcode_option geneticcode_option_sequence | . geneticcode_option --> "codeorder" "=" positive_integer | "nucorder" "=" identifier | tokens | "extensions" "=" "\"" identifier_list "\"" . geneticcode_description --> geneticcode_list "\n" geneticcode_list | ";" . geneticcode_list --> geneticcode_symbol geneticcode_list | . codeset --> "codeset" star identifier codeset_type "=" codename_list ";" | . codeset_type --> "(" codeset_type_name ")" | . codeset_type_name --> "characters" | "unaligned" | "taxa" . codename_list --> codename codename_list | . codename --> identifier ":" set . /*----------------------------------------------------------------------*/ /* SETS */ /*----------------------------------------------------------------------*/ block_sets --> "sets" ";" set_command_list . set_command_list --> set_command set_command_list | . set_command --> | charset | stateset | changeset | taxset | treeset | charpartition | taxpartition | treepartition . stateset --> "stateset" identifier block_set_definition ";" . changeset --> "changeset" identifier "=" change_set ";" . treeset --> "treeset" identifier block_set_definition ";" . charpartition --> "charpartition" identifier definition ";" . taxpartition --> "taxpartition" identifier definition ";" . treepartition --> "treepartition" identifier definition ";" . change_set --> change_item changeset_direction change_item change_set | . change_item --> identifier | state_symbol | "(" state_set ")" . changeset_direction --> "<" "-" ">" | "-" ">" . /*----------------------------------------------------------------------*/ /* ASSUMPTIONS */ /*----------------------------------------------------------------------*/ block_assumptions --> "assumptions" ";" assumptions_command_list; . assumptions_command_list --> assumptions_command assumptions_command_list | . assumptions_command --> options | usertype | typeset | wtset | exset | ancstates | taxset | charset . options --> "options" options_command_list ";" . usertype --> "usertype" identifier "(" usertype_definition ";" . typeset --> "typeset" star identifier definition_tokens ";" . wtset --> "wtset" star identifier definition ";" . exset --> "exset" star identifier definition_notokens ";" . ancstates --> "ancstates" star identifier definition_standard ";" . options_command_list --> options_command options_command_list | . options_command --> deftype | polytcount | gapmode . deftype --> "deftype" "=" identifier . polycount --> "polycount_type" "=" polycount_type . polycount_type --> "minsteps" | "maxsteps" . usertype_definition --> "stepmatrix" ")" "=" positive_integer "\n" state_word_list stepmatrix_row_list | "cstree" ")" "=" cstree . stepmatrix_row_list --> "\n" stepmatrix_row stepmatrix_row_list | . stepmatrix_row --> stepmatrix_item stepmatrix_row | . stepmatrix_item --> numeric | "." | "i" . cstree --> "(" cstree cstree_list ")" state_symbol_optional | state_symbol . cstree_list --> "," cstree cstree_list | . /*----------------------------------------------------------------------*/ /* TREES */ /*----------------------------------------------------------------------*/ block_trees --> "trees" ";" translate tree . translate --> "translate" reference reference translate_list ";" | . translate_list --> "," reference reference translate_list | . tree --> "tree" tree_rest | "utree" tree_rest | . tree_rest --> star identifier "=" root tree_definition ";" tree . tree_definition --> "(" tree_definition tree_list ")" tree_label_optional | tree_label . root --> | "[&R]" | "[&U]" | . tree_list --> "," tree_definition tree_list | . tree_label --> identifier length | numeric length . tree_label_optional --> tree_label | . length --> ":" numeric | . /*----------------------------------------------------------------------*/ /* NOTES */ /*----------------------------------------------------------------------*/ block_notes --> "notes" ";" block_notes_command_list . block_notes_commnad_list --> block_notes_command block_notes_command_list | . block_notes_command --> text | picture . text --> "text" text_options "text" "=" identifier ";" | . text_options --> taxon character state tree_set source . taxon --> "taxon" "=" set_description | . character --> "character" "=" set_description | . state --> "state" "=" set_description | . tree_set --> "tree" "=" set_description | . set_description --> reference | "(" set ")" . source --> "source" "=" source_option | . source_option --> "inline" | "file" | "resource" . picture --> "picture" picture_options "source" "=" source_option "picture" "=" identifier ";" | . picture_options --> taxon | character | state | tree_set | format | encode . format --> "format" "=" format_option | . encode --> "encode" "=" encode_option | . source_option --> "inline" | "file" | "resource" . format_option --> "pict" | "tiff" | "eps" | "jpeg" | "gif" . encode_option --> "none" | "uuencode" | "binhex" . /*----------------------------------------------------------------------*/ /* COMMENTS */ /*----------------------------------------------------------------------*/ comment --> command_comment | text_comment . text_comment --> "[" text_comment_content "]" | . text_comment_content --> text_comment | no_bracket_token text_comment_content | . no_braket_token --> Any token except "]" . command_comment --> | "[" "\" "i" "]" | "[" "\" "b" "]" | "[" "\" "u" "]" | "[" "\" "p" "]" . /*----------------------------------------------------------------------*/ /* COMMON */ /*----------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- Formatted object definition rules used by object definition commands ---------------------------------------------------------------------------- */ definition --> "(" format_tokens_standard | "=" definition_tokens_standard . definition_tokens --> "(" definition_tokens_rest | "=" definition_tokens_standard . definition_tokens_rest --> "standard" ")" "=" definition_tokens_standard | "vector" ")" "=" definition_tokens_vector . definition_notokens --> "(" definition_notokens_rest | "=" definition_notokens_standard . definition_notokens_rest --> "standard" ")" "=" definition_notokens_standard | "vector" ")" "=" definition_notokens_vector . definition_standard --> "(" format_unknown_standard | "=" definition_unknown_standard . format_unknown_standard --> "tokens" format_tokens_standard | "notokens" format_notokens_standard | "standard" format_unknown_standard | "vector" format_unknown_vector | ")" "=" definition_unknown_standard . format_unknown_vector --> "tokens" format_tokens_vector | "notokens" format_notokens_vector | "standard" format_unknown_standard | "vector" format_unknown_vector | ")" "=" definition_unknown_vector . format_tokens_standard --> "tokens" format_tokens_standard | "notokens" format_notokens_standard | "standard" format_tokens_standard | "vector" format_tokens_vector | ")" "=" definition_tokens_standard . format_notokens_standard --> "tokens" format_tokens_standard | "notokens" format_notokens_standard | "standard" format_notokens_standard | "vector" format_notokens_vector | ")" "=" definition_notokens_standard . format_tokens_vector --> "tokens" format_tokens_vector | "notokens" format_notokens_vector | "standard" format_tokens_standard | "vector" format_tokens_vector | ")" "=" definition_tokens_vector . format_notokens_vector --> "tokens" format_tokens_vector | "notokens" format_notokens_vector | "standard" format_notokens_standard | "vector" format_notokens_vector | ")" "=" definition_notokens_vector . definition_tokens_standard --> reference ":" set token_standard_list . definition_notokens_standard --> set . definition_tokens_vector --> reference_list . definition_notokens_vector --> nonsemicolon_word_list . definition_unknown_standard --> nonsemicolon_word_list . definition_unknown_vector --> nonsemicolon_word_list . token_standard_list --> "," reference ":" set token_standard_list | . nonsemicolon_word_list ---> nonsemicolon_word nonsemicolon_word_list | . /* ---------------------------------------------------------------------------- Matrix definition rules ---------------------------------------------------------------------------- */ matrix_data --> identifier matrix_entry_list matrix_data_rest | . matrix_data_rest --> "\n" matrix_data | . matrix_entry_list --> state_word matrix_entry_list | "(" state_composed_word state_composed_list ")" matrix_entry_list | "{" state_composed_word state_composed_list "}" matrix_entry_list | . state_composed_word --> state_complex_word | state_complex_word ":" state_complex_word . state_complex_word --> positive_integer | state_word . state_composed_list --> state_composed_word state_composed_list | . /* ---------------------------------------------------------------------------- Equate definition rules ---------------------------------------------------------------------------- */ equate --> "equate" "=" "\"" equate_element equate_list "\"" . equate_list --> equate_element equate_list | . equate_element --> equate_symbol "=" equate_element_definition . equate_element_definition --> "(" equate_word_list ")" | equate_symbol . equate_word_list --> equate_word equate_word_list | . /* ---------------------------------------------------------------------------- Set definition rules ---------------------------------------------------------------------------- */ set --> set_item set | . set_item --> "all" set_period | "reminder" | reference set_item_range . set_item_range --> "-" set_item_range_identifier set_period | . set_period --> "\" positive_integer | . set_item_range_identifier --> "." | reference . /* ---------------------------------------------------------------------------- Individual rules used by various commands or blocks ---------------------------------------------------------------------------- */ block_set_definition --> "=" set | "(" block_set_format . block_set_format --> "standard" ")" "=" set | "vector" ")" "=" binary_word_list . charset --> "charset" identifier block_set_definition ";" . gapmode --> "gapmode" "=" gapmode_type . gapmode_type --> "missing" | "newstate" . identifier_list --> identifier identifier_list | . labels --> "labels" | "nolabels" . missing --> "missing" "=" character_symbol . newtaxa --> "newtaxa" ntax | ntax . newtaxa_optional --> newtaxa | . ntax --> "ntax" "=" positive integer . reference --> positive_integer | identifier . reference_list --> reference reference_list | . star --> "*" | . state_symbol_optional --> state_symbol | . symbols --> "symbols" "=" "\"" state_symbol_list "\"" . state_word_list --> state_word state_word_list | . taxlabels --> "taxlabels" identifier identifier_list ";" . taxlabels_optional --> taxlabels | . taxset --> "taxset" identifier block_set_definition ";" . tokens --> "tokens" | "notokens" . /* ---------------------------------------------------------------------------- Special token matching rules ---------------------------------------------------------------------------- */ equate_word --> A token composed by equate symbols state_word --> A token composed by state symbols binary_word_list --> List of words where each word is composed only by 0 or 1 numbers. identifier --> A token satisfing the regular expression [_\w]+[\d\w\._]*. Note that an single _ is considered a valid identifier. In most contexts a single _ means a "don't care identifier", simmilar to the _ meaning in prolog. numeric --> A number on any format, integer or real. positive_integer --> An integer greater than 0. Must satisfy the regular expression [1-9][\d]* equate_symbol --> Any character except any of the following: \n()[]{}/\,;:*`'"<>^ or any of the currently defined as missing, gap, matchchar, or symbol. character_symbol --> Any character except any of the following: \n\s()[]{}<>/\,;:=*^'" matchchar_symbol --> Any symbol except any of the following: \n\s()[]{}/\,;:=*'"`<>^ state_symbol --> Any symbol except any of the following \n()[]{}<>/\,;:=*'"`~ and space symbols in a state_symbol_string are ignored. For example " 1 2 3 " is equivalent to "123" geneticcode_symbol --> An identifier or a number /*----------------------------------------------------------------------*/ /* ID */ /*----------------------------------------------------------------------*/ /* This is a context free grammar describing an item in NEXUS. An item is a syntactic element. Intuitively an item can be a single token, or a single comment, or a combination of tokens and comments. The term item raised because in NEXUS, single syntactic elements can be composed by lists of comments anywhere, that is, at the middle, begining, or end. Moreover, comments can be nested. Therefore a simple deterministic finite automata it is not enough to recognize a syntactic element of NEXUS. That is why we use the following grammar to recoginize items in a NEXUS file. */ item_list --> item item_list | end_of_file . item --> comment comment_rest | token token_rest . comment_rest--> | comment comment_rest | token comment_rest | space . token_rest --> | comment comment_rest | space . comment --> "[" comment_token_list "]" . comment_token_list --> comment_token comment_token_list | comment | . comment_token --> Any token except the square brakets, that is, except "[" or "]". space --> Any sequence of characters defined as a BLANK CHARACTER in NEXUS. end_of_file --> The end of file token