: Use /bin/sh
#
# $Id: fixdict.X,v 1.10 91/07/03 18:20:35 geoff Exp $
#
# Copyright 1987, 1988, 1989, by Geoff Kuenning, Manhattan Beach, CA
# Permission for non-profit use is hereby granted.
# All other rights reserved.
# See "version.h" for a more complete copyright notice.
#
# $Log:	fixdict.X,v $
# Revision 1.10  91/07/03  18:20:35  geoff
# Don't use the ":-" notation in defining TMPDIR, since some
# braindamaged Bourne shells don't handle it.
# 
# Revision 1.9  89/04/28  01:07:58  geoff
# Change Header to Id;  nobody cares about my pathnames.
# 
# Revision 1.8  88/12/26  02:24:36  geoff
# Update the copyright notice.
# 
# Revision 1.7  88/02/20  23:10:48  geoff
# Fix the usage of the -e switch to specify standard input properly.
# Remove an unneeded sort.
# 
# Revision 1.6  87/09/24  23:24:03  geoff
# Get rid of colons in the optional-variable setting (Israel Pinkas).
# 
# Revision 1.5  87/09/14  22:38:28  geoff
# Add copyright comments
# 
# Revision 1.4  87/07/20  23:21:16  geoff
# Get rid of the EXPAND stuff;  it's obsolete.  Add DEFHASH and SORTTMP
# support.  Look in the current directory for DEFHASH first.
# 
# Revision 1.3  87/06/07  14:47:22  geoff
# Make LIBDIR auto-configurable
# 
# Revision 1.2  87/05/27  23:16:08  geoff
# Update expand script usage
# 
# Revision 1.1  87/04/19  22:25:04  geoff
# Initial revision
# 
#
#	Add capitalization information to an ispell dictionary
#
#	Usage:
#
#	fixdict dict-file
#
#	Requires availability of UNIX spell.  The new dictionary is
#	rewritten in place.  A list of words that couldn't be
#	resolved (because spell doesn't know them) is written to
#	standard output.  This list appears in lowercase in the
#	dictionary, and if there are any errors the must be edited
#	by hand.
#
#	The final dictionary appears in expanded form and must be
#	passed through munchlist to regenerate suffixes.
#
LIBDIR=!!LIBDIR!!
DEFHASH=!!DEFHASH!!
SORTTMP="-T ${TMPDIR-/usr/tmp}"		# !!SORTTMP!!
TDIR=${TMPDIR-/tmp}
TMP=${TDIR}/fix$$

#
#	Figure out where to get the hash file.  The preference is
#	for one in the current directory, if it exists, since this script
#	is intended primarily to be used during installation.
#
DICT="$DEFHASH"
[ -r "$DICT" ]  ||  DICT="$LIBDIR/$DEFHASH"

trap "/bin/rm -f ${TMP}*; exit 1" 1 2 15
ispell -e -d $DICT -p /dev/null < $1 \
  | tr '[A-Z]' '[a-z]' \
  | spell > ${TMP}a
#
# ${TMP}a contains all the words that spell doesn't like.
# Now figure out which of those are because spell doesn't know them at
# all, and leave those in ${TMP}b.
#
tr '[a-z]' '[A-Z]' < ${TMP}a | spell | tr '[A-Z]' '[a-z]' > ${TMP}b
#
# The wrongly-capitalized words are those that spell didn't object to
# in the last step.  Produce a list of them in, and capitalize the
# first letter of each.  Save this list in ${TMP}c.
#
comm -23 ${TMP}a ${TMP}b \
  | sed 's/^a/A/;s/^b/B/;s/^c/C/;s/^d/D/;s/^e/E/;s/^f/F/;s/^g/G/;s/^h/H/
     s/^i/I/;s/^j/J/;s/^k/K/;s/^l/L/;s/^m/M/;s/^n/N/;s/^o/O/;s/^p/P/
     s/^q/Q/;s/^r/R/;s/^s/S/;s/^t/T/;s/^u/U/;s/^v/V/;s/^w/W/;s/^x/X/
     s/^y/Y/;s/^z/Z/' > ${TMP}c
#
# Find out which of those spell objects to, saving the failures in ${TMP}d.
#
spell ${TMP}c > ${TMP}d
#
# Extract the words which were correctly capitalized at the first letter,
# combine them with an all-capitals version of the ones that weren't, and
# put the result into ${TMP}e.
#
(comm -23 ${TMP}c ${TMP}d;  tr '[a-z]' '[A-Z]' < ${TMP}d) \
  | sort $SORTTMP -o ${TMP}e
#
# At this point, ${TMP}b contains the words that spell just plain doesn't
# like, and ${TMP}e contains the words that are now capitalized correctly.
#
/bin/rm ${TMP}[cd]
#
# Put it all together, rewriting the dictionary in place.
#
ispell -e -d $DICT -p /dev/null < $1 \
  | tr '[A-Z]' '[a-z]' \
  | sort $SORTTMP \
  | comm -23 - ${TMP}a \
  | sort $SORTTMP -f -o $1 - ${TMP}b ${TMP}e
#
# Finally, write the list of words that have questionable capitalization
# to the standard output.
#
cat ${TMP}b
/bin/rm ${TMP}*
