### non-standard lexical rules ### $ALF_non_whitespace = '\S+'; $ALF_any_string_value = join('$|^', $ALF_quoted_string, $ALF_non_whitespace, ); $ALF_liberty_identifier = '[A-Za-z0-9\$#][A-Za-z0-9_\$#\.]*'; $ALF_liberty_string_value = join('$|^', $ALF_quoted_string, $ALF_liberty_identifier, ); $ALF_unix_file_name = '[^\s\.\/]+'; $ALF_unix_file_relative = '\.{1,2}'; $ALF_unix_file_id = "$ALF_unix_file_name|$ALF_unix_file_relative"; $ALF_unix_file_separator = '\/+'; $ALF_unix_file_path = "($ALF_unix_file_separator)*(($ALF_unix_file_id)($ALF_unix_file_separator($ALF_unix_file_id)))*"; ### Lexical parser ### sub ALF_InitLexical { @ALF_ParserData = (); ### contains a representation of parsed lexical data @ALF_ParserIndex = (); ### contains a cross-reference between lexical token and line number in input file %ALF_FirstLine = (); $ALF_CurrentDataBase = undef; $ALF_ParserState='token'; ### contains a control value for the lexical parser %ALF_Expression = (); %ALF_ExpressionRule = (); %ALF_ExpressionToken = (); } sub ALF_ReadLexical { local(@newdata) = @_; ### populates @ALF_ParserData with the lexical representation of @newdata. Returns true local $oldtoken = $#ALF_ParserData; ### eliminate trailing '\' local $last = $newdata[$#newdata]; if ($last =~ /([^\\]*)\\/) { $last = $1; $newdata[$#newdata] = length($last)? $last : undef; } foreach $newdata (@newdata) { local ($newdata1,$newdata2); ### inline comment continues until EOL if ($ALF_ParserState eq 'inline_comment') { ### block comment continues until */ } elsif ($ALF_ParserState eq 'block_comment') { $ALF_ParserState = 'token' if ($newdata =~ /\*\/$/); ### quoted string continues until " } elsif ($ALF_ParserState eq 'quoted_string') { if ($newdata =~ /([^\42]*)\42([^\42]*)$/) { $ALF_ParserData[$#ALF_ParserData] = $ALF_ParserData[$#ALF_ParserData]." $1\42"; $newdata2 = $2; $ALF_ParserState = 'token'; } else { $ALF_ParserData[$#ALF_ParserData] = $ALF_ParserData[$#ALF_ParserData]." $newdata"; } ### index continues until ] } elsif ($ALF_ParserState eq 'index') { $ALF_ParserData[$#ALF_ParserData] = $ALF_ParserData[$#ALF_ParserData].$newdata; $ALF_ParserState = 'token' if ($newdata =~ /\]$/); ### inline comment starts } elsif ($newdata =~ /^\/\//) { $ALF_ParserState = 'inline_comment'; ### block comment starts } elsif ($newdata =~ /^\/\*/) { $ALF_ParserState = ($newdata =~ /\*\/$/)? 'token' : 'block_comment'; ### quoted string starts } elsif ($newdata =~ /^([^\42]*)\42(.*)/) { $newdata1 = $1; local $moredata = $2; if ($moredata =~ /([^\42]*)\42([^\42]*)$/) { $ALF_ParserData[$#ALF_ParserData+1] = "\42$1\42"; $newdata2 = $2; $ALF_ParserState = 'token'; } else { $ALF_ParserData[$#ALF_ParserData+1] = "\42$moredata"; $ALF_ParserState = 'quoted_string'; } ### index starts } elsif ($newdata =~ /^\[/) { $ALF_ParserData[$#ALF_ParserData+1] = $newdata; $ALF_ParserState = ($newdata =~ /\]$/)? 'token' : 'index'; ### other } else { foreach $split (split(/($ALF_delimiter|$ALF_meta_operator)/,$newdata)) { ### number if (&ALF_LexicalMatch('number',$split)) { $ALF_ParserData[$#ALF_ParserData+1] = $split; ### escaped identifier } elsif (&ALF_LexicalMatch('escaped_identifier',$split)) { $ALF_ParserData[$#ALF_ParserData+1] = $split; ### placeholder identifier } elsif (&ALF_LexicalMatch('placeholder_identifier',$split)) { $ALF_ParserData[$#ALF_ParserData+1] = $split; ### indexed identifier } elsif (&ALF_LexicalMatch('indexed_identifier',$split)) { $ALF_ParserData[$#ALF_ParserData+1] = $split; ### everything else } else { foreach $split2 (split(/($ALF_operator)/,$split)) { $ALF_ParserData[$#ALF_ParserData+1] = $split2 if length($split2); } } } } ### leftover before quoted string if (length($newdata1)) { local $savepointer = $#ALF_ParserData; local $savedata = $ALF_ParserData[$savepointer]; foreach $split (split(/($ALF_delimiter|$ALF_operator)/,$newdata1)) { if (length($split)) { $ALF_ParserData[$savepointer] = $split; $savepointer += 1; } } $ALF_ParserData[$savepointer] = $savedata; } ### leftover after quoted string if (defined($newdata2)) { foreach $split (split(/($ALF_delimiter|$ALF_operator)/,$newdata2)) { $ALF_ParserData[$#ALF_ParserData+1] = $split if length($split); } } print("# ALF Lexical Info:\tparser state = $ALF_ParserState\tparser data = $newdata\n") if ($ALF_Debug > 1); } local $newtoken = $#ALF_ParserData; $ALF_ParserIndex[$#ALF_ParserIndex+1] = $newtoken; if ($ALF_Debug) { local @data; local $n = $newtoken - $oldtoken; for $i (1..$n) { $data[$#data+1] = $ALF_ParserData[$i+$oldtoken]; } local $text = ($n == 1)? "1 token" : "$n tokens"; print("# ALF Lexical Info:\tidentified $text in line ",&ALF_CurrentLineNumber($newtoken)," (",$oldtoken+1,"-$newtoken):\t@data\n"); } $ALF_ParserState = 'token' if ($ALF_ParserState eq 'inline_comment'); 1; } sub ALF_CurrentLineNumber { local($token) = @_; ### returns the line number where $ALF_ParserData[$token] appears local $line = 0; if ($token > $#ALF_ParserData) { $line = $#ALF_ParserIndex; } else { while($ALF_ParserIndex[$line] < $token) { $line = $line + 1; } } $line + 1 - $ALF_FirstLine{$ALF_CurrentDataBase}; } sub ALF_LexicalMatch { local($rule,$item) = @_; ### returns true if there is a lexical match between $item and the pattern embodied in $rule. Returns false otherwise local $rulepattern = eval('$ALF_'.$rule); ($item =~ /^$rulepattern$/) && ($item eq $&); } sub ALF_LexicalRule { local($rule,$token,$forced) = @_; ### evaluates $ALF_ParserData[$token] against lexical $rule. Returns true if lexical match or emits error message if $forced local $rulepattern = eval('$ALF_'.$rule); local $match = ($ALF_ParserData[$token] =~ /^$rulepattern$/); print("# ALF Lexical Info:\tdata[$token](",$ALF_ParserData[$token],") matches rule $rule\n") if ($match && $ALF_Debug > 1); $match || $forced && &ALF_LexicalError($rule,$token); } sub ALF_LexicalError { local($rule,$token) = @_; ### prints error message related to lexical $rule and $ALF_ParserData[$token]. Returns true local $line = &ALF_CurrentLineNumber($token); local $data = $ALF_ParserData[$token]; print("# ALF Lexical Error:\t\47$data\47 in line $line does not match rule \47$rule\47\n"); 1; } sub ALF_LexicalItems { local($rule,$token) = @_; ### increments $token while there is a lexical match between $rule and $ALF_ParserData[$token]. Returns token of last match, if any match. Otherwise returns token before initial $token. local $rulepattern = eval('$ALF_'.$rule); while ($ALF_ParserData[$token] =~ /$rulepattern/ ) { $token = $token + 1; } $token-1; } 1;