Wednesday, June 23, 2010

Flex-based lexical scanner for ParaSail

In this entry and the next are Flex and Yacc grammars for the current definition for ParaSail.  They are actually grammars designed for aflex and ayacc, Ada equivalents of flex and yacc produced many years ago by the Arcadia project, so the commenting conventions and the code are for Ada rather than some other language.  But hopefully you can make the transformation fairly easily to your language of choice.

This entry contains the "aflex"-compatible grammar; the next will contain the "ayacc"-compatible grammar.


-- Flex-compatible grammar for ParaSail
%START IDENT Z

STRING_LITERAL  (\"([^\"]|[\\][.])*\")

CHAR_LITERAL    (\'([^\']|[\\][.])\')

IDENTIFIER        [a-zA-Z]([_]?[a-zA-Z0-9])*

  -- The following are used to match all numeric literals.
  -- Note that double underscores are rejected.
DIGIT_SEQUENCE    [0-9]([_]?[0-9])*
HEX_SEQUENCE      [0-9a-fA-F]([_]?[0-9a-fA-F])*
EXPONENT          [Ee][-+]?{DIGIT_SEQUENCE}

%%

  -- ParaSail reserved words
"abs"        {ECHO_L; ENTER(Z); return (ABS_kw);}
"abstract"    {ECHO_L; ENTER(Z); return (ABSTRACT_kw);}
"all"        {ECHO_L; ENTER(Z); return (ALL_kw);}
"and"        {ECHO_L; ENTER(Z); return (AND_kw);}
"block"        {ECHO_L; ENTER(Z); return (BLOCK_kw);}
"case"        {ECHO_L; ENTER(Z); return (CASE_kw);}
"class"        {ECHO_L; ENTER(Z); return (CLASS_kw);}
"concurrent"    {ECHO_L; ENTER(Z); return (CONCURRENT_kw);}
"const"        {ECHO_L; ENTER(Z); return (CONST_kw);}
"continue"    {ECHO_L; ENTER(Z); return (CONTINUE_kw);}
"each"        {ECHO_L; ENTER(Z); return (EACH_kw);}
"else"        {ECHO_L; ENTER(Z); return (ELSE_kw);}
"elsif"        {ECHO_L; ENTER(Z); return (ELSIF_kw);}
"end"        {ECHO_L; ENTER(Z); return (END_kw);}
"exit"        {ECHO_L; ENTER(Z); return (EXIT_kw);}
"exports"    {ECHO_L; ENTER(Z); return (EXPORTS_kw);}
"extends"    {ECHO_L; ENTER(Z); return (EXTENDS_kw);}
"for"        {ECHO_L; ENTER(Z); return (FOR_kw);}
"forward"    {ECHO_L; ENTER(Z); return (FORWARD_kw);}
"function"    {ECHO_L; ENTER(Z); return (FUNCTION_kw);}
"if"        {ECHO_L; ENTER(Z); return (IF_kw);}
"import"    {ECHO_L; ENTER(Z); return (IMPORT_kw);}
"in"        {ECHO_L; ENTER(Z); return (IN_kw);}
"interface"    {ECHO_L; ENTER(Z); return (INTERFACE_kw);}
"is"        {ECHO_L; ENTER(Z); return (IS_kw);}
"locked"    {ECHO_L; ENTER(Z); return (LOCKED_kw);}
"loop"        {ECHO_L; ENTER(Z); return (LOOP_kw);}
"mod"        {ECHO_L; ENTER(Z); return (MOD_kw);}
"mutable"    {ECHO_L; ENTER(Z); return (MUTABLE_kw);}
"new"        {ECHO_L; ENTER(Z); return (NEW_kw);}
"not"        {ECHO_L; ENTER(Z); return (NOT_kw);}
"null"        {ECHO_L; ENTER(Z); return (NULL_kw);}
"of"        {ECHO_L; ENTER(Z); return (OF_kw);}
"operator"    {ECHO_L; ENTER(Z); return (OPERATOR_kw);}
"optional"    {ECHO_L; ENTER(Z); return (OPTIONAL_kw);}
"or"        {ECHO_L; ENTER(Z); return (OR_kw);}
"procedure"    {ECHO_L; ENTER(Z); return (PROCEDURE_kw);}
"queued"    {ECHO_L; ENTER(Z); return (QUEUED_kw);}
"ref"        {ECHO_L; ENTER(Z); return (REF_kw);}
"rem"        {ECHO_L; ENTER(Z); return (REM_kw);}
"return"    {ECHO_L; ENTER(Z); return (RETURN_kw);}
"reverse"    {ECHO_L; ENTER(Z); return (REVERSE_kw);}
"select"    {ECHO_L; ENTER(Z); return (SELECT_kw);}
"some"        {ECHO_L; ENTER(Z); return (SOME_kw);}
"then"        {ECHO_L; ENTER(Z); return (THEN_kw);}
"type"        {ECHO_L; ENTER(Z); return (TYPE_kw);}
"var"        {ECHO_L; ENTER(Z); return (VAR_kw);}
"while"        {ECHO_L; ENTER(Z); return (WHILE_kw);}
"with"        {ECHO_L; ENTER(Z); return (WITH_kw);}
"xor"        {ECHO_L; ENTER(Z); return (XOR_kw);}

  -- Match all the compound ParaSail delimiters. 
"=?"        {ECHO_L; ENTER(Z); return(COMPARE);}
"=="        {ECHO_L; ENTER(Z); return(EQ);}
"!="        {ECHO_L; ENTER(Z); return(NEQ);}
">="        {ECHO_L; ENTER(Z); return(GEQ);}
"<="        {ECHO_L; ENTER(Z); return(LEQ);}
"**"        {ECHO_L; ENTER(Z); return(POWER);}
":="        {ECHO_L; ENTER(Z); return(ASSIGN);}
":=:"        {ECHO_L; ENTER(Z); return(SWAP);}
".."        {ECHO_L; ENTER(Z); return(DOT_DOT);}
"::"        {ECHO_L; ENTER(Z); return(DOUBLE_COLON);}
"[["        {ECHO_L; ENTER(Z); return(DOUBLE_LEFT_BRACKET);}
"]]"        {ECHO_L; ENTER(Z); return(DOUBLE_RIGHT_BRACKET);}
"=>"        {ECHO_L; ENTER(Z); return(REFERS_TO);}
"->"        {ECHO_L; ENTER(Z); return(GIVES);}
"==>"        {ECHO_L; ENTER(Z); return(IMPLIES);}
";;"        {ECHO_L; ENTER(Z); return(SEQUENCE);}
"||"        {ECHO_L; ENTER(Z); return(PARALLEL);}

  -- Match all the ParaSail single-character delimiters.
<IDENT>\'  {ECHO_L; ENTER(Z);     return(PRIME);}
"("        {ECHO_L; ENTER(Z);     return('(');}
")"        {ECHO_L; ENTER(IDENT); return(')');}
"["        {ECHO_L; ENTER(Z);     return('[');}
"]"        {ECHO_L; ENTER(IDENT); return(']');}
"<"        {ECHO_L; ENTER(Z);     return('<');}
">"        {ECHO_L; ENTER(Z);     return('>');}
"{"       {ECHO_L; ENTER(Z);      return('{');}
"}"       {ECHO_L; ENTER(Z);      return('}');}
"*"        {ECHO_L; ENTER(Z);     return('*');}
"+"        {ECHO_L; ENTER(Z);     return('+');}
","        {ECHO_L; ENTER(Z);     return(',');}
"-"        {ECHO_L; ENTER(Z);     return('-');}
"."        {ECHO_L; ENTER(Z);     return('.');}
"/"        {ECHO_L; ENTER(Z);     return('/');}
":"        {ECHO_L; ENTER(Z);     return(':');}
";"        {ECHO_L; ENTER(Z);     return(';');}
"|"        {ECHO_L; ENTER(Z);     return('|');}
"?"        {ECHO_L; ENTER(Z);     return('?');}
"~"        {ECHO_L; ENTER(Z);     return('~');}

  -- The following is used to match all valid ParaSail identifiers
  -- except reserved words. Note that leading digits and underscores
  -- are not allowed and that double underscores are not allowed.

{IDENTIFIER}       {ECHO_L; ENTER(IDENT);return(Identifier);}

  -- Enumeration literals
[#]{IDENTIFIER}    {ECHO_L; ENTER(IDENT);return(Enum_Literal);}

  -- Decimal numeric literals
{DIGIT_SEQUENCE}{EXPONENT}?  {
                  ECHO_L; ENTER(Z);
                  return(Integer_Literal);}

{DIGIT_SEQUENCE}[.]{DIGIT_SEQUENCE}{EXPONENT}?  {
                  ECHO_L; ENTER(Z);
                  return(Real_Literal);}

  -- Based numeric literals.

{DIGIT_SEQUENCE}[#]{HEX_SEQUENCE}[#]{EXPONENT}? {
                  ECHO_L; ENTER(Z);
                  return(Integer_Literal);}

{DIGIT_SEQUENCE}[#]{HEX_SEQUENCE}[.]{HEX_SEQUENCE}[#]{EXPONENT}? {
                  ECHO_L; ENTER(Z);
                  return(Real_Literal);}

"0"[xX]{HEX_SEQUENCE}        {ECHO_L; ENTER(Z); return(Integer_Literal);}
"0"[bB]{DIGIT_SEQUENCE}        {ECHO_L; ENTER(Z); return(Integer_Literal);}

  -- Match all valid character literals.
<Z>{CHAR_LITERAL}            {ECHO_L; ENTER(Z); return(Char_Literal);}

  -- Match all valid string literals.
{STRING_LITERAL}                {ECHO_L; ENTER(Z); return(String_Literal);}

"//".*    {ECHO_L;}           -- ignore comments to end-of-line

"--".*    {ECHO_L;}           -- ignore comments to end-of-line

  -- The following matches all whitespace.  Except for vertical tabs.  AFLEX,
  -- ALEX and LEX do not support vertical tabs.
[ \r\t\f]+ {ECHO_L;}        -- ignore spaces,Carriage returns,tabs,form feeds

  -- The following matches all new lines.

[\n]       {ECHO_L; linenum;}

  -- The following matches everything else and prints an error message
  -- indicating that something unexpected was found.

.          {ECHO_L; 
            text_io.put_line("?? lexical error '" & 
          parasail_lex_dfa.yytext & "' ??");
        num_errors := num_errors + 1;}

%%
with parasail_tokens; 
use  parasail_tokens;
use text_io;

package parasail_lex is
  
  lines      : positive := 1;
  num_errors     : natural := 0;
  Trace          : Boolean := False;

  procedure ECHO_L; --local version_of define_string.
  procedure linenum; 

  function yylex return token;

end parasail_lex;

package body parasail_lex is

  procedure ECHO_L is
  --
  -- Local version of the  define string.
  -- 
  begin
     text_io.put(yytext);
  end ECHO_L;


  procedure linenum is
    line_number_string : constant string :=
          integer'image ( lines );
  begin
    lines := lines + 1;
    put(line_number_string);
    for i in 1 .. 5 - integer ( line_number_string'length ) loop
      text_io.put(" ");
    end loop;

  end linenum;

##

end parasail_lex;

No comments:

Post a Comment