### non-standard lexical rules ###

$ALF_non_whitespace = '\S+';

$ALF_any_string_value = join('$|^',
	$ALF_quoted_string,
	$ALF_non_whitespace,
);

$ALF_liberty_identifier = '[A-Za-z0-9\$#][A-Za-z0-9_\$#\.]*';
$ALF_liberty_string_value = join('$|^',
	$ALF_quoted_string,
	$ALF_liberty_identifier,
);

$ALF_unix_file_name = '[^\s\.\/]+';
$ALF_unix_file_relative = '\.{1,2}';
$ALF_unix_file_id = "$ALF_unix_file_name|$ALF_unix_file_relative";
$ALF_unix_file_separator = '\/+';
$ALF_unix_file_path = "($ALF_unix_file_separator)*(($ALF_unix_file_id)($ALF_unix_file_separator($ALF_unix_file_id)))*";

### Lexical parser ###

sub ALF_InitLexical {
	@ALF_ParserData = ();	### contains a representation of parsed lexical data
	@ALF_ParserIndex = ();	### contains a cross-reference between lexical token and line number in input file
	%ALF_FirstLine = ();
	$ALF_CurrentDataBase = undef;
	$ALF_ParserState='token';	### contains a control value for the lexical parser
	%ALF_Expression = ();
	%ALF_ExpressionRule = ();
	%ALF_ExpressionToken = ();
}

sub ALF_ReadLexical {	local(@newdata) = @_;	### populates @ALF_ParserData with the lexical representation of @newdata. Returns true
	local $oldtoken = $#ALF_ParserData;
### eliminate trailing '\'
	local $last = $newdata[$#newdata];
	if ($last =~ /([^\\]*)\\/) {
		$last = $1;
		$newdata[$#newdata] = length($last)? $last : undef;
	}
	foreach $newdata (@newdata) {
		local ($newdata1,$newdata2);
### inline comment continues until EOL
		if ($ALF_ParserState eq 'inline_comment') {
### block comment continues until */
		} elsif ($ALF_ParserState eq 'block_comment') {
			$ALF_ParserState = 'token' if ($newdata =~ /\*\/$/);
### quoted string continues until "
		} elsif ($ALF_ParserState eq 'quoted_string') {
			if ($newdata =~ /([^\42]*)\42([^\42]*)$/) {
				$ALF_ParserData[$#ALF_ParserData] = $ALF_ParserData[$#ALF_ParserData]." $1\42";
				$newdata2 = $2;
				$ALF_ParserState = 'token';
			} else {
				$ALF_ParserData[$#ALF_ParserData] = $ALF_ParserData[$#ALF_ParserData]." $newdata";
			}
### index continues until ]
		} elsif ($ALF_ParserState eq 'index') {
			$ALF_ParserData[$#ALF_ParserData] = $ALF_ParserData[$#ALF_ParserData].$newdata;
			$ALF_ParserState = 'token' if ($newdata =~ /\]$/);
### inline comment starts
		} elsif ($newdata =~ /^\/\//) {
			$ALF_ParserState = 'inline_comment';
### block comment starts
		} elsif ($newdata =~ /^\/\*/) {
			$ALF_ParserState = ($newdata =~ /\*\/$/)? 'token' : 'block_comment';
### quoted string starts
		} elsif ($newdata =~ /^([^\42]*)\42(.*)/) {
			$newdata1 = $1;
			local $moredata = $2;
			if ($moredata =~ /([^\42]*)\42([^\42]*)$/) {
				$ALF_ParserData[$#ALF_ParserData+1] = "\42$1\42";
				$newdata2 = $2;
				$ALF_ParserState = 'token';
			} else {
				$ALF_ParserData[$#ALF_ParserData+1] = "\42$moredata";
				$ALF_ParserState = 'quoted_string';
			}
### index starts
		} elsif ($newdata =~ /^\[/) {
			$ALF_ParserData[$#ALF_ParserData+1] = $newdata;
			$ALF_ParserState = ($newdata =~ /\]$/)? 'token' : 'index';
### other 
		} else {
			foreach $split (split(/($ALF_delimiter|$ALF_meta_operator)/,$newdata)) {
	### number
				if (&ALF_LexicalMatch('number',$split)) {
					$ALF_ParserData[$#ALF_ParserData+1] = $split;
	### escaped identifier
				} elsif (&ALF_LexicalMatch('escaped_identifier',$split)) {
					$ALF_ParserData[$#ALF_ParserData+1] = $split;
	### placeholder identifier
				} elsif (&ALF_LexicalMatch('placeholder_identifier',$split)) {
					$ALF_ParserData[$#ALF_ParserData+1] = $split;
	### indexed identifier
				} elsif (&ALF_LexicalMatch('indexed_identifier',$split)) {
					$ALF_ParserData[$#ALF_ParserData+1] = $split;
	### everything else
				} else {
					foreach $split2 (split(/($ALF_operator)/,$split)) {
						$ALF_ParserData[$#ALF_ParserData+1] = $split2 if length($split2);
					}
				}
			}
		}
### leftover before quoted string
		if (length($newdata1)) {
			local $savepointer = $#ALF_ParserData;
			local $savedata = $ALF_ParserData[$savepointer];
			foreach $split (split(/($ALF_delimiter|$ALF_operator)/,$newdata1)) {
				if (length($split)) {
					$ALF_ParserData[$savepointer] = $split;
					$savepointer += 1;
				}
			}
			$ALF_ParserData[$savepointer] = $savedata;
		}
### leftover after quoted string
		if (defined($newdata2)) {
			foreach $split (split(/($ALF_delimiter|$ALF_operator)/,$newdata2)) {
				$ALF_ParserData[$#ALF_ParserData+1] = $split if length($split);
			}
		}
		print("# ALF Lexical Info:\tparser state = $ALF_ParserState\tparser data = $newdata\n") if ($ALF_Debug > 1);
	}
	local $newtoken = $#ALF_ParserData;
	$ALF_ParserIndex[$#ALF_ParserIndex+1] = $newtoken;
	if ($ALF_Debug) {
		local @data;
		local $n = $newtoken - $oldtoken;
		for $i (1..$n) { $data[$#data+1] = $ALF_ParserData[$i+$oldtoken]; }
		local $text = ($n == 1)? "1 token" : "$n tokens";
		print("# ALF Lexical Info:\tidentified $text in line ",&ALF_CurrentLineNumber($newtoken)," (",$oldtoken+1,"-$newtoken):\t@data\n");
	}
	$ALF_ParserState = 'token' if ($ALF_ParserState eq 'inline_comment');
	1;
}

sub ALF_CurrentLineNumber {	local($token) = @_;	### returns the line number where $ALF_ParserData[$token] appears
	local $line = 0;
	if ($token > $#ALF_ParserData) {
		$line = $#ALF_ParserIndex;
	} else {
		while($ALF_ParserIndex[$line] < $token) {
			$line = $line + 1;
		}
	}
	$line + 1 - $ALF_FirstLine{$ALF_CurrentDataBase};
}

sub ALF_LexicalMatch {	local($rule,$item) = @_;	### returns true if there is a lexical match between $item and the pattern embodied in $rule. Returns false otherwise
	local $rulepattern = eval('$ALF_'.$rule);
	($item =~ /^$rulepattern$/) && ($item eq $&); 
}

sub ALF_LexicalRule {	local($rule,$token,$forced) = @_;	### evaluates $ALF_ParserData[$token] against lexical $rule. Returns true if lexical match or emits error message if $forced
	local $rulepattern = eval('$ALF_'.$rule);
	local $match = ($ALF_ParserData[$token] =~ /^$rulepattern$/);
	print("# ALF Lexical Info:\tdata[$token](",$ALF_ParserData[$token],") matches rule $rule\n") if ($match && $ALF_Debug > 1);
	$match || $forced && &ALF_LexicalError($rule,$token);
}

sub ALF_LexicalError {	local($rule,$token) = @_;	### prints error message related to lexical $rule and $ALF_ParserData[$token]. Returns true
	local $line = &ALF_CurrentLineNumber($token);
	local $data = $ALF_ParserData[$token];
	print("# ALF Lexical Error:\t\47$data\47 in line $line does not match rule \47$rule\47\n");
	1;
}

sub ALF_LexicalItems {	local($rule,$token) = @_;	### increments $token while there is a lexical match between $rule and $ALF_ParserData[$token]. Returns token of last match, if any match. Otherwise returns token before initial $token. 
	local $rulepattern = eval('$ALF_'.$rule);
	while ($ALF_ParserData[$token] =~ /$rulepattern/ ) {
		$token = $token + 1;
	}
	$token-1;
}

1;