# spellcheck.awk -- interactive spell checker
#
# AUTHOR: Dale Dougherty
#
# Usage: nawk -f spellcheck.awk [+dict] file 
# (Use spellcheck as name of shell program) 
# SPELLDICT = "dict" 
# SPELLFILE = "file"

# BEGIN actions perform the following tasks: 
#	1) process command line arguments
#	2) create temporary filenames
#	3) execute spell program to create wordlist file
#	4) display list of user responses

BEGIN { 
# Process command line arguments
# Must be at least two args -- nawk and filename
	if (ARGC > 1) {
	# if more than two args, second arg is dict 
		if (ARGC > 2) {
		# test to see if dict is specified with "+"  
		# and assign ARGV[1] to SPELLDICT
			if (ARGV[1] ~ /^\+.*/) 
				SPELLDICT = ARGV[1]
			else 
				SPELLDICT = "+" ARGV[1]
		# assign file ARGV[2] to SPELLFILE 
			SPELLFILE = ARGV[2]
		# delete args so awk does not open them as files
			delete ARGV[1]
			delete ARGV[2]
		}
	# not more than two args
		else {
		# assign file ARGV[1] to SPELLFILE 
			SPELLFILE = ARGV[1]
		# test to see if local dict file exists
			if (! system ("test -r dict")) {
			# if it does, ask if we should use it
				printf ("Use local dict file? (y/n)")	
				getline reply < "-"
			# if reply is yes, use "dict" 
				if (reply ~ /[yY](es)?/){
					SPELLDICT = "+dict"
				}
			}
		}
	} # end of processing args > 1 
	# if args not > 1, then print shell-command usage 
	else {
		print "Usage: spellcheck [+dict] file"
		exit 1
	}
# end of processing command line arguments

# create temporary file names, each begin with sp_
	wordlist = "sp_wordlist"
	spellsource = "sp_input"
	spellout = "sp_out"

# copy SPELLFILE to temporary input file
	system("cp " SPELLFILE " " spellsource)

# now run spell program; output sent to wordlist
	print "Running spell checker ..."
	if (SPELLDICT)
		SPELLCMD = "spell " SPELLDICT " "
	else
		SPELLCMD = "spell "
	system(SPELLCMD spellsource " > " wordlist )

# test wordlist to see if misspelled words turned up
	if ( system("test -s " wordlist ) ) {
	# if wordlist is empty, (or spell command failed), exit
		print "No misspelled words found."
		system("rm " spellsource " " wordlist)
		exit
	}	

# assign wordlist file to ARGV[1] so that awk will read it.	
	ARGV[1] = wordlist

# display list of user responses 
	responseList = "Responses: \n\tChange each occurrence," 
	responseList = responseList "\n\tGlobal change," 
	responseList = responseList "\n\tAdd to Dict,"  
	responseList = responseList "\n\tHelp," 
	responseList = responseList "\n\tQuit" 
	responseList = responseList "\n\tCR to ignore: "
	printf("%s", responseList)

} # end of BEGIN procedure

# main procedure, executed for each line in wordlist.
#	Purpose is to show misspelled word and prompt user
#	for appropriate action.

{
# assign word to misspelling
	misspelling = $1 
	response = 1
	++word
# print misspelling and prompt for response
	while (response !~ /(^[cCgGaAhHqQ])|^$/ ) {
		printf("\n%d - Found %s (C/G/A/H/Q/):", word, misspelling)
		getline response < "-"
	}
# now process the user's response
# CR - carriage return ignores current word 
# Help
	if (response ~ /[Hh](elp)?/) {
	# Display list of responses and prompt again.
		printf("%s", responseList)
		printf("\n%d - Found %s (C/G/A/Q/):", word, misspelling)
		getline response < "-"
	}
# Quit
	if (response ~ /[Qq](uit)?/) exit
# Add to dictionary
	if ( response ~ /[Aa](dd)?/) { 
		dict[++dictEntry] = misspelling
	}
# Change each occurrence
	if ( response ~ /[cC](hange)?/) {
	# read each line of the file we are correcting
		newspelling = ""; changes = ""
		while( (getline < spellsource) > 0){
		# call function to show line with misspelled word
		# and prompt user to make each correction 
			make_change($0)
		# all lines go to temp output file
			print > spellout
		}	
	# all lines have been read 
	# close temp input and temp output file
		close(spellout)
		close(spellsource)
	# if change was made
		if (changes){ 
		# show changed lines
			for (j = 1; j <= changes; ++j)
				print changedLines[j]
			printf ("%d lines changed. ", changes) 
		# function to confirm before saving changes
			confirm_changes()
		}
	}
# Globally change
	if ( response ~ /[gG](lobal)?/) {
	# call function to prompt for correction
	# and display each line that is changed.
	# Ask user to approve all changes before saving.
		make_global_change()
	}	
} # end of Main procedure

# END procedure makes changes permanent.
# It overwrites the original file, and adds words
# to the dictionary.
# It also removes the temporary files.

END {
# if we got here after reading only one record, 
# no changes were made, so exit.
	if (NR <= 1) exit
# user must confirm saving corrections to file
	while (saveAnswer !~ /([yY](es)?)|([nN]o?)/ ) {
		printf "Save corrections in %s (y/n)? ", SPELLFILE
		getline saveAnswer < "-"
	}
# if answer is yes then mv temporary input file to SPELLFILE
# save old SPELLFILE, just in case
	if (saveAnswer ~ /^[yY]/) {
		system("cp " SPELLFILE " " SPELLFILE ".orig")
		system("mv " spellsource " " SPELLFILE)
	}
# if answer is no then rm temporary input file
	if (saveAnswer ~ /^[nN]/)
		system("rm " spellsource) 

# if words have been added to dictionary array, then prompt
# to confirm saving in current dictionary. 
	if (dictEntry) {
		printf "Make changes to dictionary (y/n)? "
		getline response < "-"
		if (response ~ /^[yY]/){
		# if no dictionary defined, then use "dict"
			if (! SPELLDICT) SPELLDICT = "dict"
		
		# loop through array and append words to dictionary
			sub(/^\+/, "", SPELLDICT)
			for ( item in dict )
				print dict[item] >> SPELLDICT
			close(SPELLDICT)
		# sort dictionary file 
			system("sort " SPELLDICT "> tmp_dict")
			system("mv " "tmp_dict " SPELLDICT)
		}
	}
# remove word list
	system("rm sp_wordlist")
} # end of END procedure

# function definitions

# make_change -- prompt user to correct misspelling 
#		 for current input line.  Calls itself
# 		 to find other occurrences in string.
# 	stringToChange -- initially $0; then unmatched substring of $0
# 	len -- length from beginning of $0 to end of matched string 
# Assumes that misspelling is defined. 

function make_change (stringToChange, len,	# parameters
	line, OKmakechange, printstring, carets)	# locals
{
# match misspelling in stringToChange; otherwise do nothing 
  if ( match(stringToChange, misspelling) ) {
  # Display matched line 
	printstring = $0
	gsub(/\t/, " ", printstring)
	print printstring
	carets = "^"
	for (i = 1; i < RLENGTH; ++i)
		carets = carets "^"
	if (len)
		FMT = "%" len+RSTART+RLENGTH-2 "s\n"
	else
		FMT = "%" RSTART+RLENGTH-1 "s\n"
	printf(FMT, carets)
  # Prompt user for correction, if not already defined
	if (! newspelling) {
		printf "Change to:"
		getline newspelling < "-"
	}
  # A carriage return falls through
  # If user enters correction, confirm  
	while (newspelling && ! OKmakechange) {
		printf ("Change %s to %s? (y/n):", misspelling, newspelling)
		getline OKmakechange < "-"
		madechg = ""
	# test response
		if (OKmakechange ~ /[yY](es)?/ ) {
		# make change (first occurrence only)
			madechg = sub(misspelling, newspelling, stringToChange)
		}
		else if ( OKmakechange ~ /[nN]o?/ ) {
			# offer chance to re-enter correction 
			printf "Change to:"
			getline newspelling < "-"
			OKmakechange = ""
		}
	} # end of while loop

   # if len, we are working with substring of $0
	if (len) {
	# assemble it
		line = substr($0,1,len-1)
		$0 = line stringToChange
	}
	else {
		$0 = stringToChange
		if (madechg) ++changes
	}

   # put changed line in array for display
	if (madechg) 
		changedLines[changes] = ">" $0

   # create substring so we can try to match other occurrences
	len += RSTART + RLENGTH
	part1 = substr($0, 1, len-1)
	part2 = substr($0, len)
   # calls itself to see if misspelling is found in remaining part 
	make_change(part2, len) 

  } # end of if

} # end of make_change()

# make_global_change --
#		prompt user to correct misspelling 
#		for all lines globally.  
#		Has no arguments
# Assumes that misspelling is defined. 

function make_global_change(    newspelling, OKmakechange, changes)
{
# prompt user to correct misspelled word
   printf "Globally change to:"
   getline newspelling < "-"

# carriage return falls through
# if there is an answer, confirm 
   while (newspelling && ! OKmakechange) {
		printf ("Globally change %s to %s? (y/n):", misspelling,
				newspelling)
		getline OKmakechange < "-"
	# test response and make change
		if (OKmakechange ~ /[yY](es)?/ ) {
		# open file, read all lines 
			while( (getline < spellsource) > 0){
			# if match is found, make change using gsub
			# and print each changed line.
				if ($0 ~ misspelling) {
					madechg = gsub(misspelling, newspelling)
					print ">", $0
					changes += 1  # counter for line changes
				}
			# write all lines to temp output file
				print > spellout
			} # end of while loop for reading file

		# close temporary files
			close(spellout)
			close(spellsource)
		# report the number of changes	
			printf ("%d lines changed. ", changes) 
		# function to confirm before saving changes
			confirm_changes()
		} # end of if (OKmakechange ~ y) 

	# if correction not confirmed,  prompt for new word
		else if ( OKmakechange ~ /[nN]o?/ ){
			printf "Globally change to:"
			getline newspelling < "-"
			OKmakechange = ""
		}

  } # end of while loop for prompting user for correction

} # end of make_global_change()

# confirm_changes --  
#		confirm before saving changes

function confirm_changes(  savechanges) {
# prompt to confirm saving changes
	while (! savechanges ) {
		printf ("Save changes? (y/n)")
		getline savechanges < "-"
	}
# if confirmed, mv output to input
	if (savechanges ~ /[yY](es)?/)
		system("mv " spellout " " spellsource) 
}