% !TEX TS-program = lualatex % arara: lualatex % arara: lualatex \documentclass[11pt]{article} \usepackage{lua-check-hyphen} % This entire package is placed under the terms of the % LaTeX Project Public License, version 1.3 or later % (http://www.latex-project.org/lppl.txt). % It has the status "maintained". % % Author: Mico Loretan (loretan dot mico at gmail dot com) % Date: 2015/10/26 % The file selnolig.tex contains the user guide to the % selnolig package. % Can only compile this file if running Lua(La)TeX: \usepackage{ifluatex} \ifluatex\else \typeout{ =========================================} \typeout{ The file selnolig.tex must be compiled } \typeout{ using LuaLaTeX. Exiting immediately. } \typeout{ =========================================} \endinput \fi % Load the fontspec package and specify % several font families \usepackage{fontspec} \ifdefined\suppressfontnotfounderror \expandafter\let\csname xetex_suppressfontnotfounderror:D\endcsname \suppressfontnotfounderror \else \expandafter\let\csname xetex_suppressfontnotfounderror:D\endcsname \luatexsuppressfontnotfounderror \fi \setmainfont[FeatureFile= gpp-ft.fea, Numbers = OldStyle, Ligatures = {TeX, Common, Rare}, BoldFont = {Garamond Premier Pro}, ItalicFont = {Garamond Premier Pro Italic}] {Garamond Premier Pro} \setsansfont[Scale=MatchLowercase, Ligatures = NoCommon] {Myriad Pro} \setmonofont[Scale=0.81, Ligatures = NoCommon] {Consolas} \newfontfamily\ebg[ Numbers = OldStyle, Ligatures = {TeX, Common, Discretionary, Historic}, ItalicFeatures = {Scale = MatchLowercase}, ItalicFont = {EB Garamond 12 Italic}] {EB Garamond 12 Regular} \newfontfamily\sabon[Ligatures={Common,Rare,TeX}, Scale=MatchLowercase] {Sabon Next LT Pro} \newfontfamily\blackletter[Ligatures=NoCommon] {UnifrakturMaguntia} % Load several more packages, including babel % and -- of course! -- selnolig. \usepackage{geometry,tabularx,booktabs,ragged2e} \usepackage[english=american]{csquotes} \usepackage[ngerman,english]{babel} \usepackage[ngerman,english,hdlig,broadf]{selnolig} % Suppress st ligatures for German words that % occur in this user guide. \nolig{Kunststo}{Kuns|ts|to} % Kunststoff \nolig{Bausto}{Baus|to} % Baustoff \nolig{streif}{s|treif} % streifte \nolig{instrum}{ins|trum} % Zupfinstrument \nolig{justiz}{jus|tiz} % Strafjustiz \nolig{Konstanz}{Kons|tanz} \nolig{Institut}{Ins|titut} \nolig{Wasserstoff}{Wassers|toff} \nolig{stiefl}{s|tiefl} % stiefle \nolig{straff}{s|traff} \nolig{aufisst}{auf|iss|t} % suppress st liga as well \nolig{löste}{lös|te} \nolig{straße}{s|traße} \nolig{steifbein}{s|teifbein} \nolig{steiflehnig}{s|teiflehnig} \nolig{strafbar}{s|trafbar} \nolig{belustigt}{belus|tigt} \nolig{flichst}{flichs|t} % flichst, höflichst \nolig{trittst}{tritts|t} \nolig{vortrefflichsten}{vortrefflichs|ten} \nolig{schnellste}{schnells|te} \nolig{sträflich}{s|träflich} \nolig{Surftest}{Surftes|t} \nolig{bestanden}{bes|tanden} \nolig{leistung}{leis|tung} \nolig{Faust}{Faus|t} % Undo action of rule, activated by broadf option % \nolig{ffing}{ff|ing} % for a couple of German words: \keeplig{ffingrig} \keeplig{ffinger} \nolig{tzeit}{t|zeit} % for Fraktur example in section 6 \hyphenation{charles-town} % provide some German hyphenation patterns for ... \hyphenation{jugend-treff-lei-ter vor-treff-lich vor-treff-li-che auf-for-de-rung hin-auf-blick-te auf-füh-ren auf-hor-chen-den her-auf-kom-men un-er-schöpf-lich un-be-greif-lich kauf-leu-ten schlaf-lo-sig-keit ver-werf-li-che} \debugon % Load still a few more packages... \usepackage{microtype} \LoadMicrotypeFile{pad} \usepackage{multicol} \usepackage[bottom,splitrule]{footmisc} \usepackage{sectsty} \allsectionsfont{\mdseries} \usepackage{tocloft} \cftsetindents{sec}{0em}{1.75em} \cftsetindents{subsec}{1.75em}{2em} \cftsetindents{subsubsec}{3.75em}{3em} \setlength\cftbeforesecskip{1ex} \renewcommand{\cfttoctitlefont}{\large} \renewcommand{\cftsecfont}{\mdseries} \renewcommand{\cftsubsecfont}{\mdseries} \renewcommand{\cftsecpagefont}{\mdseries} \renewcommand{\cftsubsecpagefont}{\mdseries} \renewcommand{\cftsecleader}{\cftdotfill{\cftdotsep}} \usepackage{fancyvrb} \DefineShortVerb{\|} % Logos for "LuaLaTeX" and "XeLaTeX" \usepackage{metalogo} \makeatletter \def\xl@drop@TeX@e{0.34ex} % default: 0.5ex \def\xl@drop@Xe@e{0.34ex} % default: 0.5ex \def\xl@kern@La@La{-.33em} % default: -0.36ex \def\xl@kern@LaTeX@aT{-.16em} % default: -0.15ex \makeatother % Some frequently-used commands in this document: \newcommand{\pkg}[1]{\textsf{#1}} \newcommand{\opt}[1]{\texttt{#1}} \newcommand{\cmmd}[1]{\texttt{\textbackslash #1}} \newcommand\utf{\textsc{utf-8}} % Modify the \appendix command \let\oldappendix\appendix \renewcommand\appendix{% %\clearpage \addtocontents{toc}{\protect{\vspace{1\baselineskip}}} \addtocontents{toc}{\protect{\mdseries Appendices\par}} \noindent {\Large Appendices} \oldappendix} \frenchspacing \clubpenalty10000 \widowpenalty10000 \usepackage[svgnames]{xcolor} \usepackage[colorlinks=true,linktocpage, allcolors=DarkBlue]{hyperref} \usepackage[capitalize,nameinlink,noabbrev]{cleveref} % The macros \selnoligpackagename, % \selnoligpackageversion, and \selnoligpackagedate % used below are defined in 'selnolig.sty'. \title{The \selnoligpackagename\ package: \\ Selective suppression of typographic ligatures\thanks{ Current version: \selnoligpackageversion. Features of the \pkg{selnolig} package are subject to change without prior notice. \newline\hspace*{\parindent} The main text font used in this document is Garamond Premier Pro. {\ebg EB Garamond} is used for words that use the {\ebg fb, fh, fj, fk, ffb, ffh, ffj, ffk, \emph{es}, and \emph{sk}} ligatures. \enquote{Common}, \enquote{discretionary}, and \enquote{historic} typographic ligatures are enabled for these text fonts\textemdash \emph{and} are suppressed selectively using the rules of the \pkg{selnolig} package.} } \author{Mico Loretan\thanks{ \href{mailto:loretan.mico@gmail.com}{loretan dot mico at gmail dot com}.}} \date{\selnoligpackagedate} \begin{document} \VerbatimFootnotes \pagenumbering{roman} \selnoligoff \maketitle \selnoligon \begin{abstract} The \pkg{selnolig} package suppresses typographic ligatures selectively, i.e., based on predefined search patterns. The search patterns focus on ligatures deemed inappropriate because they span morpheme boundaries. For example, the word \opt{shelfful}, which is mentioned in the \TeX book as a word for which the ff~ligature might be inappropriate, is automatically typeset as shelfful rather than as \uselig{shelfful}. \newline\hspace*{\parindent} For English and German language documents, the \pkg{selnolig} package provides extensive rules for the selective suppression of so-called \enquote{common} ligatures. These comprise the ff, fi, fl, ffi, and~ffl ligatures as well as the ft and~fft ligatures. Other f-ligatures, such as {\ebg\uselig{fb}}, {\ebg\uselig{fh}}, {\ebg\uselig{fj}} and {\ebg\uselig{fk}}, are suppressed globally, while making exceptions for names and words of non-English/German origin, such as {\ebg Kafka} and {\ebg fjord}. \newline\hspace*{\parindent} For English language documents, the package further provides ligature suppression rules for a number of so-called \enquote{discretionary} or \enquote{rare} ligatures, such as~ct, st, and~sp. \newline\hspace*{\parindent} The \pkg{selnolig} package requires use of the \LuaLaTeX\ format provided by a recent \TeX\ distribution, e.g., \TeX Live\,2013 and MiK\TeX\,2.9. \end{abstract} \tocloftpagestyle{empty} \tableofcontents \clearpage \pagenumbering{arabic} \section{Introduction} The ability of \TeX\ and Friends to use typographic ligatures has long been cherished by its users. Indeed, the automated and transparent use of typographic ligatures by \TeX\ and Friends is often offered up as one of the reasons for using these programs to obtain high-quality typeset output. However, even though the automatic use of typographic ligatures is highly desirable in general, there are words for which the use of certain typographic ligatures may not be appropriate. The \TeX book observes, on page~19, that the word |shelfful| may look better if it is typeset as \enquote{shelfful}, i.e., \emph{without} the ff-ligature, rather than as \enquote{\uselig{shelfful}}. Some other English-language words that would generally be considered to be good candidates for non-use of ligatures are \uselig{cufflink} and \uselig{offload}; compare their appearance with that of cufflink and offload. Observe that all three of these words are composed of two meaning-bearing particles or \emph{morphemes}: the first morpheme ends in an~\enquote{f\,} or~\enquote{ff\,} while the second morpheme starts with either an~\enquote{f\,} (in the case of shelfful) or an~\enquote{l} (in the cases of cufflink and offload). A \href{http://en.wikipedia.org/wiki/Morpheme}{morpheme}, briefly stated, is the smallest linguistic unit within a word that bears distinct meaning; all words\textemdash other than nonsense words, I suppose\textemdash contain at least one morpheme. The words apple and orange contain one morpheme each, and the words apples, oranges, shelfful, cufflink, and offload each contain \emph{two} morphemes. The main purpose of the \pkg{selnolig} package is to provide methods and rules for an automated yet selective (rather than global) suppression of typographic ligatures that span certain morpheme boundaries. For English language documents, the need to suppress typographic ligatures that span morpheme boundaries does not appear to be a hugely pressing typographic concern, possibly because English doesn't feature composite words that frequently. However, in other languages, such as German, composite words are much more common. In these languages, there is naturally a much greater potential for composite words to feature instances of |ff|, |fi|, |fl| etc.\ character pairs that span morpheme boundaries. In German typography, a ligature that spans a morpheme boundary appears to be something that should be avoided at (nearly) all cost, presumably because the presence of such ligatures has the potential to impair seriously the intelligibility of the composite words.%\footnote{For German texts, I believe that the \emph{Duden} provides authoritative backing for questions related to selective ligature suppression. For English texts, I'm not aware of the existence of a document, issued by an official or quasi-official body, that prescribes whether ligatures that span morpheme boundaries should be suppressed. If anybody can provide such a reference, I would be happy to list it.} \TeX\ and Friends offer several methods for suppressing ligatures on a case-by-case basis. In \LaTeX, there are three basic methods for suppressing ligatures: (i)~insertion of an \enquote{empty atom}, |{}|, between the characters whose ligature should be avoided; (ii)~insertion of an explicit italic correction, |\/|; and (iii)~insertion of an explicit \enquote{kern}, e.g., |\kern0pt| or \Verb/\hspace{0pt}/.\footnote{Note, though, that the first ligature suppression method, \Verb/{}/, does \emph{not} work if the document is compiled with \LuaLaTeX, and that the italic-correction method can produce inferior results if the text is being typeset in, well, the italic font shape.} The \pkg{babel} package, when used with the \opt{ngerman} or \opt{german} options, offers the \enquote{shortcut} macro \Verb/"|/ to suppress ligatures. A drawback of these ligature suppression methods is that they must be applied separately to each and every occurrence of all words that contain unwanted ligatures. As such, these case-by-case methods are both time-intensive and tedious. Moreover, there's always a residual risk that some words for which ligatures should be suppressed will be overlooked in the editing process. There are also several preprocessor-type packages and scripts\textemdash I mention \pkg{rmligs} and \pkg{Ligatures-German} in \cref{sec:thanks} below, but others exist as well\textemdash that scan the input file(s) and insert marks (usually, but not necessarily, the \pkg{babel} \Verb/"|/ shortcuts) in the places where ligatures should be avoided. While ingenious, these preprocessor-based solutions suffer from several drawbacks which, taken together, may help explain why they do not appear to be in widespread use despite their usefulness. First, they add complexity to the document preparation process. E.g., if the document is being edited inside an IDE or integrated development environment, the input files have to be closed prior to running the preprocessor scripts on them; then the files have to be re-opened in order to recompile them. Second, the presence of \Verb+"|+ macros in the input may interfere with the work of programs such as spell checkers. Third, \textsc{afaict} none of the ligature-suppressing preprocessor packages I'm familiar with have been written to handle ligature suppression for English language texts. Fourth, their scope generally seems to be limited to the most basic f-ligatures (ff, fi, fl, ffi, and ffl), making them less than fully useful for fonts that provide further f-ligatures, such as ft and fft, or \enquote{rare} ligatures such as st and sp. Fifth, they usually require access to auxilliary programs (e.g., a \textsc{unix} environment and a \opt{perl} distribution in the case of the \pkg{rmligs} package) that need not be present on a given user's computer. What has \emph{not} been available so far is a \LaTeX\ package that performs selective ligature suppression while avoiding the drawbacks associated with the preprocessor approach. Such a package should provide lists of language-specific word patterns for which ligatures should be suppressed, and it should systematically discover, during compilation, all words to which these patterns apply and proceed to suppress the indicated ligatures. Such a package should, at a minimum, be able to handle the basic f-ligatures (ff, fi, fl, ffi, and ffl); given the increasing prevalence of ligature-rich Opentype fonts, it would be useful is the \LaTeX\ package were also able to suppress additional f-ligatures, such as ft and fft, as well as rare ligatures. The package should also be reasonably easy to extend, in the sense that users should be able to augment or modify the ligature-suppression rules to suit their documents' characteristics. The \pkg{selnolig} package is meant to meet all of these goals and criteria. The \pkg{selnolig} package provides rules to suppress \emph{selectively} the following f-ligatures, for both English and German documents: ff, fi, fl, ffi, and ffl\textemdash the \enquote{standard} f-ligatures that should be familiar to most users of Computer Modern fonts\textemdash as well as the~ft and~fft ligatures. The latter two ligatures, while not provided by the Computer Modern and Latin Modern font families, are frequently available in \href{http://en.wikipedia.org/wiki/Serif#Old_Style}{oldstyle} (also known as \enquote{Garalde}) font families.\footnote{In some oldstyle font families, the ligatures \enquote{ft} and \enquote{fft} are rendered as \enquote{\sabon ft} and \enquote{\sabon fft}, respectively.} Oldstyle-type font families generally feature a great variety of typographic ligatures. Given the beauty and growing popularity of these font families, it's important to be able to make good use of many of their features, including the presence of ligatures outside the \enquote{basic five} set. In addition to supressing ligatures selectively for the f-ligatures mentioned above, the ligatures {\ebg \uselig{fb}, \uselig{fh}, \uselig{fj}, \uselig{fk},} and \emph{\uselig{ij}} are suppressed \emph{globally} for both English and German language documents. Exceptions are provided, however, so as not to suppress these ligatures for selected words of \emph{non-English\slash German} origin, such as {\ebg fjord, fjell, Prokofjew, Kafka,} and \emph{rijsttafel}. \enlargethispage{0.5\baselineskip} For English documents, the \pkg{selnolig} package recognizes two further options, \opt{broadf} and \opt{hdlig}. If \opt{broadf} is set, additional f-ligatures will be suppressed selectively. If \opt{hdlig} option is set, selective ligature suppression is performed on discretionary\slash rare ligatures such as ct, st, sp, {\ebg\emph{sk}}, \emph{th}, \emph{at}, \emph{et}, \emph{ll}, \emph{as}, \emph{\ebg es}, \emph{is}, and~\emph{us}. No rules are currently provided to suppress historic and/or disrectionary\slash rare ligatures for German documents.\footnote{A quick remark on the classification of typographic ligatures. The f-ligatures are generally called \enquote{common} in most families. Beyond this group, though, there appears to be little or no standardization across Opentype fonts as to which typographic ligatures should be labelled \enquote{historic} and which ones should be labelled \enquote{discretionary}\slash\enquote{rare}. For instance, the fonts Latin Modern Roman, Garamond Premier Pro, and Hoefler Text report having \enquote{only} discretionary ligatures. In contrast, the fonts Junicode, Cardo, EB Garamond, and Palatino Linotype all report having both historic and discretionary ligatures. The name of the package option |hdlig| is supposed to convey a sense of agnosticism as to how these non-common ligatures should be labelled.} Of course, no claim as to the completeness of either the English or German language list is or can be made. Hence, the \pkg{selnolig} package also makes it straightforward for users to provide their own, supplemental, ligature suppression rules to treat words that occur in their documents but aren't yet covered by the package. Please feel free to email me such words, so that I can augment and update the package's ligature suppression rules suitably. A suggested template for reporting issues with the \pkg{selnolig} package is provided in \cref{sec:template}. The \pkg{selnolig} package further provides supplemental hyphenation exception lists for both English and German language words. The words in these lists are generally composite and contain one or more typographic ligatures that should be suppressed. The remainder of this document is organized as follows. \Cref{sec:hurry} provides instructions for loading the \pkg{selnolig} package and making one's document(s) suitable for compilation under \LuaLaTeX. The package's overall approach to the suppression of ligatures that span morpheme boundaries is explained in \cref{sec:approach}, the user macros are presented in \cref{sec:structure}, and options that affect the package's workings are discussed in \cref{sec:options}. \Cref{sec:further} addresses further issues that may arise when looking to break up typographic ligatures. The package's ligature suppression rules for English and German language documents are listed in \cref{sec:eng-listing,sec:germ-listing}. The code of the package's main ``style'' file, \pkg{selnolig.sty}, and Lua code file, \pkg{selnolig.lua}, is listed in \cref{sec:sty,sec:luacode}. \Cref{sec:template} provides a suggested template for reporting bugs and other issues with the package. Finally, in \cref{sec:budd} I provide lists of ligature-containing words caught by \pkg{selnolig}'s rules in two English-language and three German-language literary classics. The English pieces are \emph{Call of the Wild} and \emph{The Sea Wolf}, both by Jack London. The German pieces are Thomas Mann's \emph{Die Buddenbrooks} and Goethe's \emph{Faust}, both \emph{Part~I} and \emph{Part~II}. (Of course, I make no claim whatsoever as to any kind of statistical representativeness of this selection!) Unsurprisingly, the German pieces contain far more words for which ligatures are broken up by \pkg{selnolig} than do the English pieces. \section{I'm in a hurry! How do I start using this package?} \label{sec:hurry} \subsection[How do I load the selnolig package?]{How do I load the \pkg{selnolig} package?} \begin{itemize} \item If your document is in English and you want to enable a \enquote{basic} set of rules to suppress f-ligatures selectively, load the package by issuing the following instruction in the preamble of your document: \begin{Verbatim} \usepackage[english]{selnolig} \end{Verbatim} Synonymous options are \opt{UKenglish}, \opt{british}, \opt{USenglish}, \opt{american}, \opt{cana\-dian}, \opt{australian}, and \opt{new\-zea\-land}. If you want to load a set of f-ligature suppression rules that's broader than the set that's enabled by default, be sure to also specify the option \opt{broadf}; see \cref{sec:eng-opt}. If \enquote{historic} and/or \enquote{discretionary} ligatures (e.g., ct, st, sp, \emph{th}, \emph{\uselig{ij}}, \emph{ll}, \emph{\ebg sk}, \emph{at}, \emph{et}, \emph{as}, \emph{\ebg es}, \emph{is}, and~\emph{us}) are enabled for your text font(s), be sure to specify the option~\opt{hdlig}. The options \opt{broadf} and \opt{hdlig} may be specified independently. \enlargethispage{1\baselineskip} \item If your document is written in German, load the package as follows: \begin{Verbatim} \usepackage[ngerman]{selnolig} \end{Verbatim} Synonymous language options are \opt{german}, \opt{austrian}, \opt{naustrian}, \opt{swissgerman}, and \opt{swiss}. \item If you load the package \emph{without} an explicit language option, i.e., as \begin{Verbatim} \usepackage{selnolig} \end{Verbatim} but if one or more of the language options noted above are specified as options in the \cmmd{documentclass} instruction, \LaTeX\ will pass these options on to the \pkg{selnolig} package. \item If no language options are set either when the package is loaded or as options in the \cmmd{documentclass} instruction, you will need to provide your own ligature suppression rules. This approach is called for if you write in a language other than German or English\textemdash the only two languages currently supported by the \pkg{selnolig} package\textemdash and are able to devise your own ligature suppression rules using \pkg{selnolig}'s \cmmd{nolig} and \cmmd{keeplig} macros. \end{itemize} \subsection{Any hints on how to get started with \LuaLaTeX?} \begingroup \renewcommand\theenumi{(\roman{enumi})} \renewcommand\labelenumi\theenumi The ligature suppression macros of the \pkg{selnolig} package \emph{require} the use of \LuaLaTeX; they will not work under either pdf\LaTeX\ or \XeLaTeX. If the \pkg{selnolig} package is not run under \LuaLaTeX, a warning message will be issued and only the package's supplemental hyphenation rules will be available to the user. If you've been using pdf\LaTeX\ until now, the requirement to use \LuaLaTeX\ will likely force you to make some changes to your existing documents. Fortunately, these changes should be minor and straightforward to implement because \LuaLaTeX\ is, for the most part, a strict superset of pdf\LaTeX. Almost all documents that compile correctly under pdf\LaTeX\ should also compile correctly under \LuaLaTeX. The two most important changes you'll need to make are: \begin{enumerate} \item Do not load either the |inputenc| or the |fontenc| package. \item Be sure to load the |fontspec| package,\footnote{If the \pkg{selnolig} package is run under \LuaLaTeX\ but the \pkg{fontspec} package isn't loaded by the time the \Verb+\begin{document}+ statement is encountered, \pkg{selnolig} will terminate with an error message.} and use \cmmd{setmainfont}, \cmmd{setsansfont}, and related commands to load the fonts you wish to use. \end{enumerate} \endgroup Depending on your \TeX\ distribution, the default font family used by \LuaLaTeX\ will be either Computer Modern or Latin Modern. (This is true of pdf\LaTeX\ as well, of course.) If you wish to use a different font family, issuing some font-related instructions will be required. \emph{How} to specify fonts and font families and set up various font-related options in \LuaLaTeX\ are topics that go far beyond the scope of this user guide. I urge you to become familiar with the very well-written \href{http://www.ctan.org/tex-archive/macros/latex/contrib/fontspec/fontspec.pdf}{user guide} of the \pkg{fontspec} package. You will also need to use a \TeX\ distribution that features a fairly recent version of \LuaLaTeX. \TeX Live 2013, \TeX Live\,2012, and MiK\TeX\,2.9 satisfy this requirement; versions of \TeX Live before 2011 probably do not. If you use a command-line interface to compile a document named, say, |myfile.tex|, type \begin{Verbatim} lualatex myfile \end{Verbatim} rather than either |latex myfile| or |pdflatex myfile| to initiate compilation. If you use a text editing program with pull-down menus or buttons to invoke a suitable compiler, be sure to select |LuaLaTeX|. The very first time one runs \LuaLaTeX\ on a document with a new set of fonts, the compilation speed will likely be quite slow because \LuaLaTeX\ (actually, a package loaded by \LuaLaTeX) has to create various cache files to store font-related information. Subsequent compilation runs should be much faster. The answers to the questions entitled \href{http://tex.stackexchange.com/q/28642/5001}{Frequently loaded packages: Differences between pdf\LaTeX\ and \LuaLaTeX?} and \href{http://tex.stackexchange.com/q/32295/5001}{Using Lua\TeX\ as a replacement for pdf\TeX}, both posted to \href{http://tex.stackexchange.com/}{tex.stackexchange.com}, provide lots of very useful information for people who are new to \LuaLaTeX\ and are at least somewhat familiar with pdf\LaTeX. Another great resource for people who wish to become more familiar with \LuaLaTeX\ is \href{http://mirror.ctan.org/info/luatex/lualatex-doc/lualatex-doc.pdf}{A Guide to \LuaLaTeX} by Manuel Pégourié-Gonnard. \subsection{Anything else I need to do or know?} \label{sec:anythingelse} For multilingual support, \LuaLaTeX\ and the \pkg{selnolig} package work well with the \href{http://www.ctan.org/pkg/babel}{\pkg{babel}} package. If you use the \pkg{babel} package, be sure to load \pkg{selnolig} \emph{after} \pkg{babel}; that way, the supplemental hyphenation patterns provided by the \pkg{selnolig} package won't get clobbered by \pkg{babel}'s hyphenation settings.\footnote{The \pkg{selnolig} package is also compatible with the \href{http://www.ctan.org/pkg/hyphsubst}{\pkg{hyphsubst}} package (which, if used, should be loaded with a \Verb+\RequirePackage+ statement \emph{before} the \Verb+\documentclass+ instruction). Since mid-2013, one can also use the \href{http://www.ctan.org/tex-archive/macros/latex/contrib/polyglossia}{\pkg{polyglossia}} package with \LuaLaTeX.} \LuaLaTeX\ natively supports the so-called \utf\ input encoding scheme. In fact, \utf\ is also the \emph{only} input encoding scheme that \LuaLaTeX\ knows about. Nowadays, many modern \TeX-aware editors support \utf\ directly; \LuaLaTeX\ and \pkg{selnolig} should have no problems with \TeX\ files produced by these editors. Older files, however, may employ input encoding schemes incompatible with \utf. If your input files currently use a different input encoding scheme, e.g., \textsc{latin1}, they need to be converted to \utf before \LuaLaTeX\ can process them properly. Several methods exist for changing a file's input encoding scheme. Please see the posting \href{http://tex.stackexchange.com/q/87406/5001}{How to change a .tex file's input encoding system (preferably to \utf)?} on \href{http://tex.stackexchange.com/}{tex.stackexchage.com} for several possible conversion methods. If your document is written in German, it is assumed that all vowels with diereses (Umlaute) are entered as \Verb|ä|, \Verb|ö|, \Verb|ü|, etc.\ rather than, say, as \Verb|\"{a}|, \Verb|\"{o}|, and~\Verb|\"{u}| or, if you tend to use the \pkg{babel} \enquote{shortcuts}, as \Verb|"a|, \Verb|"o|, and~\Verb|"u|. Likewise, it's assumed that you enter the \enquote{eszett} (\enquote{scharfes~s}) character as~|ß| rather than as~|\ss|.\footnote{\TeX nically speaking, |selnolig| requires the use of |ä|, |ö|, |ü|, and |ß| only in the search strings of the ligature suppression rules.} It is also assumed that you use the triple-f (modern) spelling of words such as \texttt{Schifffahrt},\footnote{The \pkg{selnolig} package's German language rules are set to recognize words containing the old-spelling version |schiffahrt|; the ff ligature is not broken up for these words. However, most other words that have two f's in the old spelling and three f's in the new spelling don't get any special treatment in the package.} \texttt{Stofffarbe}, and \texttt{grifffest} and the double-t (modern) spelling of words such as |Mannschafttest|. Finally, all \pkg{babel}-style ``\Verb+"|+'' ligature-suppressing shortcuts should either be removed entirely or be replaced with |\breaklig| instructions; the \pkg{selnolig} package's |\breaklig| macro is explained in \cref{sec:breaklig}.\footnote{On my \LuaLaTeX\ system, whenever a \Verb+"|+ command is encountered, I either get a bad crash that requires a reboot of the computer (under Mac\TeX\,2012) or I get a stern error message about \enquote{Forbidden control sequence found while scanning use of \cmmd{FancyVerbGetVerb}} (under Mac\TeX\,2013).} \section[The selnolig package's approach to breaking up ligatures]{The \pkg{selnolig} package's approach to breaking up ligatures} \label{sec:approach} \subsection[Free, derivational, and inflectional morphemes]{Free, derivational, and inflectional morphemes, and their relationship to ligature suppression} Good typography supports and enhances the readability of the typeset text. There are obviously a great many facets to how typography may contribute to good readability. One aspect is the ease with which readers can discern the \emph{meaning} of the typeset text and its constituent parts\textemdash words. Because a typographic ligature groups two or more characters into a composite glyph, it is natural for the reader to \enquote{read} a ligature as forming a single unit and, moreover, to associate the ligature with some part of the word's meaning. Whereas this mental association of visual unity and meaning can be helpful when it comes to discerning the meaning of single-morpheme words,\footnote{Some examples of single-morpheme words containing a ligature are off, fit, flat, office, baffle, left, act, cost, and spin.} it \emph{may} detract from the word's readability if the word is composite and the ligature happens to span a morpheme boundary. Ligatures that span a morpheme boundaries \emph{may} impair a composite word's readability if their presence make readers slow down and perform a \enquote{double take} in order to figure out which morphemes are used in the composite word. What exactly are morphemes? Briefly put, morphemes are the smallest linguistic units in a word that carry \emph{meaning}. Because words are, by definition, standalone units of text, each word contains at least one morpheme.\footnote{Please don't try to get me involved in a discussion of what it may mean to have words without meaning\dots} Morphemes are classified as \emph{free} if they can stand alone as words (e.g., cat, dog, sea, see), and as \emph{bound} if they can not. E.g., the letter~|s| in the words cats, dogs, and rivers indicates the plural forms of the associated nouns; because the |s| particle cannot stand by itself as a word, it is a bound morpheme. Bound morphemes can be divided further into derivational and inflectional morphemes. A \emph{derivational} morpheme changes the meaning of the associated free morpheme in a fundamental way. E.g., the \enquote{un} in \enquote{untrue} serves to create a word with the opposite meaning of the free morpheme \enquote{true}, and the \enquote{ful} in \enquote{shelfful} indicates the word is a quantity measure (\enquote{two shelffuls of books on typography}, say). An \emph{inflectional} morpheme signifies a less fundamental change in meaning. In nouns (and, depending on the language, adjectives as well), inflectional morphemes can indicate plural forms (child vs. children, cat vs. cats) and other forms of declination.\footnote{In modern English, the virtually sole type of declination is to indicate plural status. In other languages, declination can also occur to indicate genitive, dative, accusative,~etc. forms of nouns \emph{and} adjectives. E.g., in German one says das schöne Kind, des schönen Kindes, dem schönen Kinde,~etc. The particles |en|/|es| and |en|/|e| represent inflectional morphemes associated with the genitive and dative forms of the nouns and adjectives.} In verbs, inflectional morphemes indicate conjugation, such as a change in tense of the verb. E.g., call vs. called, walk vs. walked, but also \enquote{swim} vs. \enquote{swims}, etc. Words containing more than one morpheme can consist either of \enquote{just} free morphemes\textemdash rooftop, newspaper, etc.\textemdash or of free and bound morphemes joined together\textemdash untrue, shelfful, childish, laughs, etc. Bound morphemes generally occur either as prefixes or suffixes to the word's \enquote{main part} or \enquote{stem} (the free morpheme). Prefixes almost invariably represent derivational morphemes (e.g., untrue, review, perform). Suffixes, in contrast, can consist of free, derivational, or inflectional morphemes. For instance, the suffixes |like| and |less| in dwarflike and leafless are free morphemes, whereas the suffix~|ed| in hounded and laughed is an inflectional morpheme. It is important to realize that not all ligatures that span morpheme boundaries are equally inimical to good readability. Consider, say, the word \emph{umbrellas}, which contains the ligature~\emph{as}. Note that this ligature spans the boundary between the free morpheme \emph{umbrella} and the suffix~\emph{s}. Nevertheless, I'm quite confident that very few will claim that the presence of the \emph{as} ligature detracts from the readability of the plural word \emph{umbrellas}. I believe there are two reasons why this particular word's readability is not impaired by the presence of a morpheme-spanning ligature. First, the suffix~\emph{s} is an inflectional morpheme: it \enquote{merely} serves to change the noun's state from singular to plural; clearly, most of the composite word's meaning is conveyed by the free morpheme \emph{umbrella}. Second, the ligature occurs at the very \emph{end} of the word rather than, say, closer to the beginning or middle of the word; by the time the eye reaches the \emph{s} character, most of the word's meaning will already have been perceived. Because not all morpheme-spanning ligature are equally problematic in terms of their impact on a composite word's readability, the \pkg{selnolig} package follows rules that leaves some ligatures untouched, while others are broken up. The package adopts the following broad principles: First, ligatures that cross the boundaries of two free morphemes are always suppressed. Second, ligatures that cross the boundary between a free morpheme and a derivational morpheme are also suppressed\textemdash with certain exceptions that are explained below. Third, ligatures that span the boundary between a free morpheme and an inflectional morpheme are generally not suppressed. In \cref{sec:german-rules} below, the third principle is shown to be particularly relevant for decisions related to the (non)suppression of ft and fft ligatures in certain German texts. \subsection{Sidebar: Morpheme boundaries, syllable boundaries, and ligature suppression} Observe that morphemes need not coincide with \emph{syllables}, and hence that morpheme boundaries need not coincide with syllable boundaries and/or permissible hyphenation points. Indeed, words can contain several syllables but consist of only one morpheme (e.g., apple, orange, banana), or they can contain only one syllable but consist of two or more morphemes. E.g., the words \enquote{cats} and \enquote{dogs} each contain two morphemes, and the single-syllable word \enquote{twelfths} contains three morphemes (the free morpheme |twelve|, the derivational morpheme~|th|, and the inflectional morpheme~|s|). The fact that a ligature may span a syllable boundary in no way implies that the ligature should be suppressed. Consider, for instance, the German words Affe, Griffel, Kaffee, Koffer, Löffel, Muffel, and Schiffe: All feature a syllable boundary and hyphenation point between the two |f|'s. Nevertheless, none of the ff ligatures need be broken up, because the |ff| character pair doesn't span a morpheme boundary in any of these words. Or, consider the following German words that feature ft ligatures: bekräftigen, duftend, haften, heftig, Lüftung, and vergiftet. The ft ligatures are not suppressed because the |ft| pairs don't span morpheme boundaries. Should TeX need to hyphenate some of the words listed in the preceding paragraph to generate a well-typeset paragraph, it can of course do so\textemdash and break up the ff and ft ligatures in the process. There's no need, though, to break up a ligature just because hyphenation \emph{might} occur at that point. As always, there's no meaningful rule without at least one exception; in \enquote{Interlude~I} in \cref{sec:german-rules} below, I discuss what \emph{Duden} calls ambiguous cases for which ligature suppression follows syllable boundaries. To further highlight the critical importance of morpheme boundaries for matters of ligature suppression, consider the words single-syllable words fifths and twelfth. The ft ligature is broken up for these words because it spans a boundary between a free morpheme (five and twelve, respectively) and a derivational morpheme~(th). The fact that there's no syllable boundary is immaterial. \subsection[selnolig's ligature suppression rules: English language case]{\pkg{selnolig}'s ligature suppression rules: English language case} \label{sec:english-rules} Typographic ligatures are suppressed if the following conditions apply to a word: \begin{itemize} \item if two free morphemes are joined: \uselig{halfline} $\to$ halfline, \uselig{halflife} $\to$ halflife, \uselig{cufflink} $\to$ cufflink, \uselig{halftone} $\to$ halftone, %\uselig{wolftrap} $\to$ wolftrap, \uselig{pastime} $\to$ pastime, \uselig{houndstooth} $\to$ houndstooth, \uselig{Charlestown} $\to$ Charlestown, \uselig{painstaking} $\to$ painstaking, \uselig{arctangent} $\to$ arctangent, \uselig{passport} $\to$ passport, \uselig{newspaper} $\to$ newspaper, \uselig{Hyannisport} $\to$ Hyannisport, \uselig{clothespin} $\to$ clothespin, \emph{\uselig{seastrand}} $\to$ \emph{seastrand}, \emph{\ebg \uselig{Catskills} $\to$ Catskills}, \emph{\ebg \uselig{Peekskill} $\to$ Peekskill},% \footnote{The relevant morpheme in the last two words in this list is |kill| (from an old Dutch word for stream), not |skill|.} etc. \item if a prefix and main word are joined: \uselig{offload} $\to$ offload, \uselig{mistake} $\to$ mistake, \uselig{mistrust}~$\to$ mistrust, \uselig{displease}~$\to$ displease, \uselig{suspend}~$\to$ suspend, \emph{\uselig{asea} $\to$ asea}, \emph{\uselig{ultrasound} $\to$ ultrasound}, etc. Note: If the main word, etymologically speaking, starts with |sp| or~|st|, the sp and st ligatures \emph{are} used even if the prefix ends in~|s|: disperse, dispirit, distant, distill, distress, etc. \item if a main word is followed by a suffix beginning with~|f| or~|l| other than |ly|: \uselig{shelfful}~$\to$ shelfful, \uselig{leafless} $\to$ leafless, \uselig{dwarflike}~$\to$ dwarflike, \uselig{leaflet}~$\to$ leaflet, \emph{\uselig{soulless} $\to$ soulless}, \emph{\uselig{seallike} $\to$ seallike}, etc. \enlargethispage{1.5\baselineskip} Note that the suffixes used above\textemdash |ful[l]|, |less|, |let|, and |like|\textemdash are all free morphemes. In contrast, the short suffix |ly|, if used to make adjectives into adverbs, is a derivational morpheme. The fl ligature is thus \emph{not} broken up for words such as \uselig{briefly} and \uselig{chiefly} (unless the |broadf| option is set; see below). \item If the main word ends with an~|f| and the suffix starts with an~|i|, the fi and ffi ligatures are \emph{not suppressed} (unless, again, the |broadf| option is set). Examples: \uselig{elfin}, \uselig{selfish}, \uselig{fluffily}. \item The ft ligature is also suppressed for words that end in |fth| or |fths|: \uselig{fifth}~$\to$ fifth, \uselig{twelfths}~$\to$ twelfths. Note that the particle |th| contained in these words is a derivational morpheme. \end{itemize} If the \opt{broadf} package option is set\textemdash as is the case for this user guide; after all, it's written to demonstrate the package's capabilities\textemdash the \pkg{selnolig} package will also suppress \begin{itemize} \item fi and ffi ligatures if the main word ends in~|f| and the suffix starts with an~|i|: elfin, selfish, golfing, surfing, beefier, fluffily, fluffiness, goofiness, standoffish, jiffies, buffiest,~etc.; \item fl and ffl ligatures in adverbs ending in |fly| and |ffly|, such as chiefly, briefly, and gruffly; and \item ft ligatures in words such as fifty and fiftieth. \end{itemize} The option \opt{broadf} is \emph{not} enabled by default. This is because I believe that any gains in readability that might result from breaking up the f-ligatures caught by the |broadf| rules are likely to be minor and aren't worth running the serious risk of creating unsightly visual clashes caused by unligated~f\kern0pt i, ff\kern0pt i, f\kern0pt l, and ff\kern0pt l glyphs. If the package's \opt{hdlig} option is set, an additional ligature-suppressing principle is activated: \begin{itemize} \item The~st and~sp ligatures are also suppressed for words with Greek roots that contain the character triples~|sth| and~|sph|; examples: isthmus and atmosphere. Typesetting these words as \uselig{isthmus} and \uselig{atmosphere}, i.e., with the very distinctive looking st and sp ligatures, risks reducing their readability, because doing so would obscure the presence of the \opt{th} and \opt{ph} character pairs which derive from single Greek letters~$\theta$/$\vartheta$ and $\phi$/$\varphi$, respectively. For these words, then, it seems advisable to suppress the st and sp ligatures even though, strictly speaking, no morpheme-crossing issues are involved. \end{itemize} In addition, as is explained in more detail in \cref{sec:global-nolig}, the ligatures {\ebg \uselig{fb}, \uselig{fh}, \uselig{fj}}, and {\ebg \uselig{fk}} are suppressed \emph{globally} for English language documents. This is done because there seem to be no words of \emph{English} origin for which these ligatures do not span a morpheme boundary. However, these ligatures are not suppressed for certain words of non-English origin, such as {\ebg Kafka, fjord, and fjell}. \subsection[selnolig's ligature suppression rules: German language case]{Ligature suppression rules: German language case} \label{sec:german-rules} For German words, the following rules apply when it comes to deciding which ligatures to break up and which ones to permit. These rules are built mainly from statements found in the \emph{Duden} and various websites that have taken an interest in this subject\textemdash with adaptations for the ft and fft ligatures. \begin{itemize} \item Case 1: Joining of two free morphemes: Ligatures are suppressed. Examples: \uselig{Schilfinsel} $\to$ Schilfinsel, %\uselig{Zupfinstrument} $\to$ Zupfinstrument, \uselig{Baus\kern0pt toffingenieur} $\to$ Baustoffingenieur, \uselig{Wassers\kern0pt toffionen} $\to$ Wasserstoffionen; \uselig{Impffurcht} $\to$ Impffurcht, \uselig{Senffabrik} $\to$ Senffabrik, \uselig{Ablauflogik} $\to$ Ablauflogik, \uselig{Schorfflecken} $\to$ Schorfflecken; \uselig{Zwölffingerdarm} $\to$ Zwölffingerdarm; \uselig{Brieftaube} $\to$ Brieftaube, \uselig{elfteilig} $\to$ elfteilig, %\uselig{Stofftasche} $\to$ Stofftasche, etc. \item Case 2: Joining of a prefix (whether a free or a derivational morpheme) ending in~|f| and a main word (free morpheme) starting with~|b|, |f|, |h|, |i|, |j|, |k|, |l|, or~|t|: Ligatures are suppressed. By far the most common prefix that gives rise to the need to suppress various f-ligatures at the junction of a prefix and main word is the word \enquote{auf}, as in aufbrechen, auffassen, Aufführung, auffliegen, auffischen, aufhören, aufisst, aufjaulen, aufklingen, Auflage, Auftrag, auftreten, etc. \item Case 3: Joining of a main word (free morpheme) ending in \enquote{f} or \enquote{ff} and a suffix (either a derivational or an inflectional morpheme) starting with \enquote{f}, \enquote{i}, \enquote{l}, or~\enquote{t}. \begin{itemize} \item Case 3a: Suffixes (bound morphemes) that start with an \enquote{f}, e.g., |-fach| and |-faltig|: The ff-ligature is suppressed. Examples: fünffach and zwölffaltig. \item Case 3b: Suffixes (bound morphemes) that start with an \enquote{i}, e.g., |-ig|, |-in|, and |-isch|: The~fi and~ffi ligatures are \emph{not} suppressed. Examples: streifig, äffisch, Chefin, Chefinnen. I haven't found a clear justification for this rule so far. I assume the rule is there because unligated f\breaklig i and ff\breaklig i character pairs are potentially sufficiently unsightly to make them stand out as an infraction against good typography that's even more grievous than having fi and ffi ligatures that span the boundary between a main word and a suffix. \item Case 3c: Suffixes that start with an \enquote{l}, e.g., |-lich|, |-ling|, |-lein| and |-los|: The fl-ligature is suppressed. Example words: trefflich, höflich, Prüfling, Köpflein, and s\breaklig traflos. \item Interlude~I: In ambiguous cases that could give rise to fl-ligatures that involve certain suffixes, according to \emph{Duden} preference should be given to \enquote{how the syllables are pronounced and how a word would be hyphenated}. For instance, \emph{Duden} says that the fl-ligature should be suppressed in the words Verzweiflung, Bezweifler, schweflig, and würflig.\footnote{% Note that the real suffixes in these words are |ung|, |er|, and~|ig|\textemdash not |lung|, |ler|, and~|lig|. Justifying the suppression of the fl-ligature for these words is thus not a simple matter of not letting a ligature span the \enquote{gap} between a main word and suffix. In my opinion, the rationale generally given for suppressing the fl-ligature in these cases\textemdash reliance on how the syllables are divided and how the composite words are hyphenated\textemdash is not entirely satisfactory. This is because, morphologically speaking, the main words Schwefel, Würfel, and Zweifel each contain two morphemes: a stem and the \emph{derivational} morpheme~|el|: \Verb+Schwef|el+, \Verb+Würf|el+, and \Verb+Zweif|el+. It is therefore not necessary, in my opinion, to create a new rule to justify the (non-)use of the fl-ligature for these cases. Given the presence of two morphemes in each of the main words, one could simply rely on the general rule of not letting ligatures span morpheme boundaries within the main words to motivate the suppression of the fl-ligature for words such as schweflig, würfle, and Verzweiflung, as their morphological components are \Verb+schwef|[e]l|ig+, \Verb+würf|[e]l|e+, and \Verb+Ver|zweif|[e]l|ung+. It's fair to say, though, that most modern German speakers are only vaguely aware of the fact that the~|el| particle constitutes a morpheme in words such as Würfel and Griffel\textemdash and that they are probably entirely unaware of this fact for words such as Schwefel and Zweifel. To be sure, the typographical convention adduced by \emph{Duden} for suppressing the fl-ligature in words such as schweflig, Verzweiflung, and würfle does \emph{not} appear to be based on the fact that letters~|f| and~|l| in these words belong to different morphemes.} This convention may also be applied to justify the non-use of the fl-ligature in words such as knifflig and mufflig as well as in the present-tense\slash first-person-singular forms of the verbs büffeln, löffeln, schaufeln, stiefeln, verteufeln, and zweifeln: these form are typeset \emph{without} the fl/ffl-ligature, i.e., as büffle, löffle, schaufle, stiefle, verteufle, and zweifle, respectively. \item Interlude~II: If a word \emph{ends} with an |fl| character pair because an abbreviation is in effect, \emph{Duden} says it's OK to use the fl-ligature even if the~f and~l characters belong to different morphemes. E.g., in the abbreviation \enquote{Aufl.}, the fl-ligature \emph{is} employed even though the ligature should not be used for the full, unabbreviated form of the word (\emph{viz.}, Auflage). Although not mentioned explicitly by \emph{Duden}, I believe the convention mentioned in the preceding paragraph may be extended to justify the use of the ff-ligature in the abbreviated word \enquote{Auff.} (full form: Aufführung\textemdash no ff ligature) and of the ft-ligature in \enquote{Auft.} (full form: Auftrag\textemdash no ft ligature). This convention further suggests (implies?) that it's permissible (a)~to use the ff ligature in surnames that end in |ff|, such as Orff and Hausdorff, and (b)~to use the ffi- and ffl-ligatures in abbreviated names such as Steffi and Steffl. \item Case 3d: Suffixes (derivational or inflectional morphemes) starting with~|t|. Unfortunately, not much official wisdom seems to exist to guide this case, possibly because the~ft and~fft ligatures are not (yet?) used as widely as are the other f-ligatures. The following four rules, and especially the second one, should therefore be understood to be somewhat provisional. \begin{itemize} \item The convention mentioned in \enquote{Interlude II} above, about not breaking up an fl-ligature if it occurs at the very end of a word (as in ``Aufl.''), may be extended to apply to the case of~ft and~fft ligatures as well, i.e., they are not suppressed \emph{if} they occur at the very ends of words (or word fragments that have separate meaning), as in verschärft, gestreift, gerafft, Dahingerafftsein, unbedarft, and Unbedarftheit. Note that the ft and fft ligatures span a morpheme boundary in these cases: the single-letter second morpheme, the letter~|t|, is an \emph{inflectional} morpheme that indicates a form of conjugation of the associated verb (viz., past tense and/or past participle). \item Should ft and fft ligatures be broken up in past tense and past-participle forms of verbs that do not end in~ft but, instead, in -fte, -ften, -ftes, -ftest, etc? Example words: |streifte|, |rafften|, and |schlürftest|. Because these suffixes are \enquote{merely} \emph{inflectional} rather than derivational morphemes, the \pkg{selnolig} package does \emph{not} break up the~ft and~fft ligatures in these cases either. Thus, the words will be typset as streifte, schlürftes\kern0pt t, and rafften rather than as streif\breaklig te, schlürf\breaklig tes\kern0pt t, and raff\breaklig ten.\footnote{I haven't yet come across any kind of authoritative discussion of this specific case. Should someone be able to furnish a good counterargument to the setting proposed here, I would be happy to change the settings accordingly.} \enlargethispage{1.2\baselineskip} \item Again appealing to the convention mentioned in \enquote{Interlude~II}, it would also seem OK to use the ft-ligature in expressions such as \enquote{zu fünft} and \enquote{die zwölftschnellste Sprinterin Bayerns}: Even though the~|t| at the end of |fünft| and |zwölft| is a \emph{derivational} morpheme, the ft ligature also occurs at the very end of the word or word fragment. In the case of the word \enquote{zwölftschnellste}, the argument for keeping the ft~ligature may also be based, in part, on the observation that the entire fragment \enquote{zwölft} is a prefix to \enquote{schnellste}; grouping the |t| character visually to its stem, |zwölf|, via an ft-ligature surely helps to enhance the overall readability of the sixteen-character word zwölftschnellste, right? \item In contrast, the ft-ligature should \emph{not} be used in \enquote{Beethoven's Fünfte Sinfonie} and \enquote{zum elften Mal}. The argument for breaking up the ft-ligature in the words \enquote{Fünfte} and \enquote{elften} rests on the fact that the particles |te| and |ten| are derivational morphemes and that the ft ligatures are no longer at the very end of the word (or word fragment). The justification for breaking up the ft ligatures does not rest on the that the syllable boundaries (and hyphenation points) happen to fall between the letters~|f| and~|t|. \end{itemize} \item Case 4: A free morpheme \emph{ends} in~|ft| (e.g., Saft, Kraft, Luft, Duft, Haft, and Vernunft) and is joined either to another free morpheme or to a suffix that's a bound morpheme. Example words: \opt{Saftladen}, \opt{Säfte}, \opt{Kraftfahrzeug}, \opt{Luftagentur}, \opt{duftend}, \opt{bekräftigen}, \opt{Haftung}, and \opt{vernünftig}. Because the~|ft| character pair doesn't cross a morpheme boundary, the \pkg{selnolig} package does \emph{not} break up the~ft ligature. Thus, the words are typeset as Saftladen, Säfte, Kraftfahrzeug, Luftagentur, duftend, bekräftigen, Haftung, and vernünftig. The fact that a \emph{syllable boundary} occurs between the letters~|f| and~|t| in all of these words should not affect the decision whether or not to employ the ft (or~fft) ligature. \end{itemize} \end{itemize} In addition, as is explained in more detail in \cref{sec:global-nolig}, the ligatures {\ebg \uselig{fb}, \uselig{fh}, \uselig{fj}}, and {\ebg\uselig{fk}} are suppressed \emph{globally} for German language documents. This is done because there seem to be no words of \emph{German} origin for which these ligatures do not span a morpheme boundary. However, these ligatures are not suppressed for selected words of non-German origin, such as {\ebg Kafka, Sognefjord, and Dovrefjell}. \section[Structure of the selnolig package]{Structure of the \pkg{selnolig} package} \label{sec:structure} \subsection{The main user commands} \label{sec:syntax} The four main user macros of the \pkg{selnolig} package are \cmmd{nolig}, \cmmd{keeplig}, \cmmd{uselig}, and \cmmd{breaklig}. The first two macros are meant to be used in the preamble to set up ligature-suppression rules on a document-wide basis. The latter two may be used, as needed, within the body of the document on an \emph{ad~hoc} or case-by-case basis to either supplement or override rules set up by \cmmd{nolig} and \cmmd{keeplig} instructions. The package provides four additional user commands. The instructions \cmmd{debugon} and \cmmd{debugoff}, described in more detail in \cref{sec:debugon}, serve to turn on and switch off logging of the activity of the \pkg{selnolig} package. The directives \cmmd{selnoligon} and \cmmd{selnoligoff}, described in \cref{sec:selnoligon}, turn on and switch off \pkg{selnolig}'s ligature-suppressing algorithms. \subsubsection{The \cmmd{nolig} macro} \label{sec:nolig} The package's main user macro is called \cmmd{nolig}. Each \cmmd{nolig} instruction, or rule, takes two arguments. The first is search string, and the second is a string that contains one or more ``\Verb+|+'' characters to indicates where in the search string the non-ligation \enquote{whatsits} should be inserted. E.g., the instruction \begin{Verbatim} \nolig{lfful}{lf|ful} \end{Verbatim} sets up a rule to suppress the ff-ligature in words such as \enquote{shelfful}, \enquote{bookshelfful}, and \enquote{selffulfilling}.\footnote{\TeX nically speaking, the \cmmd{nolig} macros perform their job by inserting special \enquote{whatsits} into the input stream whenever a pattern match occurs. These whatsits prevent the paragraph-building algorithm from replacing the affected character pairs (or triples) with corresponding ligatures. The package's \cmmd{keeplig} macro, described below, works by removing any nonligation whatsits from the input stream whenever a pattern match occurs, thereby re-enabling the use of ligatures.} It is possible (and permissible) to have more than one \Verb+|+ character in the second argument of a \cmmd{nolig} instruction. For instance, one \emph{could} specify the rule \Verb+\nolig{Auflaufform}{Auf|lauf|form}+ to suppress both the fl- and the ff-ligature in the words Auflaufform and Auflaufformen. For added flexibility, though, the \pkg{selnolig} package's German language rules actually uses separate \cmmd{nolig} rules to suppress the ff and fl ligature in this word; see \cref{sec:debugon} for the precise format of the rules that affect the word Auflaufform. It is also possible to use Lua-style wildcard characters in the search string, as long as the wildcard characters occur \emph{after} the non-ligation point. For example, the file \pkg{selnolig-german-patterns.sty} sets up the rules \begin{Verbatim} \nolig{Dorff[aäeiloöruü]}{Dorf|f} \nolig{dorff[aäeiloöruü]}{dorf|f} \end{Verbatim} to search for words that contain the strings \opt{Dorff} and \opt{dorff} followed by a letter in the set |aäeiloöruü|.\footnote{In case you're curious: These particular, some complicated looking search strings are used to avoid suppressing the ff-ligatures in words that \emph{end} in either \opt{Dorff} or \opt{dorff}, as would be the case with the surnames Dorff and Hausdorff.} Incidentally, it is not strictly necessary, in the second argument of the \cmmd{nolig} command, to provide any material \emph{after} the vertical bar that indicates the non-ligation point. However, the readability of your \cmmd{nolig} rules may suffer if you don't list that material. If you examine the \cmmd{nolig} rules provided in the files \pkg{selnolig-german-patterns.sty} and \pkg{selnolig-english-patterns.sty}, you'll notice soon that there's some redundancy built in, in the sense that some words' ligatures will be broken up by more than one rule. For instance, the need to suppress the ff-ligature in \enquote{auffallen} happens to be met by both \Verb+\nolig{auff}{auf|f}+ and \Verb+\nolig{ffall}{f|fall}+. This redundancy is deliberate, because not all words that might fit one pattern will also fit the other. Providing some redundancy of this type seems like a reasonable way to proceed. As with all \LaTeX\ instructions, the arguments of \cmmd{nolig}, \cmmd{keeplig}, and \cmmd{uselig} commands are case-sensitive. \subsubsection{The \cmmd{keeplig} macro} \label{sec:keeplig} The macro |\keeplig{}| allows users to create rules that override \cmmd{nolig} rules selectively: for words that contain fragment ||, the corresponding \cmmd{nolig} rule will not be executed. For a \cmmd{keeplig} rule to work properly, then, the command's argument must be a string that includes \emph{as a subset} a string treated by one or more \cmmd{nolig} rules. The \cmmd{keeplig} macro is very useful tool because it permits devising a (much) smaller set of broader, i.e., less restrictive, \cmmd{nolig} rules; any Type-II errors that may arise from having \cmmd{nolig} rules whose scope is too broad can be undone by providing judiciously crafted \cmmd{keeplig} rules.\footnote{In the present context, a Type-II error is the suppression of a ligature that is, in fact, valid for the word at hand. } Consider the following example: If the \opt{ngerman} language option is set, the \pkg{selnolig} package uses the rule \begin{Verbatim} \nolig{flich}{f|lich} \end{Verbatim} to break up the fl-ligature in a multitude of words that end in the suffix |lich| (a derivational morpheme): begrifflich, beruflich, brieflich, glimpflich, hilflich, höflich, käuflich, s\breaklig träflich, tariflich, trefflich, unerschöpflich, and verwerflich, to name but just a few. This \cmmd{nolig} rule, incidentally, also (correctly) catches the word \enquote{Lauflicht}, which contains the free morphemes |Lauf| and |licht|. However, the scope of this \cmmd{nolig} rule is a bit too broad (or, if you will, it is insufficiently restrictive) because it also catches certain words, such as \opt{Pflicht} and \opt{verpflichten}, for which the fl-ligature should \emph{not} be suppressed. Rather than provide a large number of more restrictive \cmmd{nolig} rules aimed at avoiding catching the Pflicht- and pflicht-words, the package provides the simple command \begin{Verbatim} \keeplig{flicht} \end{Verbatim} This rule tells \pkg{selnolig} to override the action of the \Verb+\nolig{flich}{f|lich}+ rule for all words that contain the string \opt{flicht}. Most words affected by this \cmmd{keeplig} rule happen to contain the strings \enquote{Pflicht} and \enquote{pflicht}. In addition, this rule also helps preserve the fl-ligature in words such as \enquote{entflicht} and \enquote{verflicht} (the third-person-singular forms of the verbs entflechten and verflechten, respectively). %%%%flichst verflichst entflichst It is important to be aware of the following fact: It is \emph{not necessarily} the case that ligatures contained in the argument of a \cmmd{keeplig} rule will be used in words that contain the rule's search string. Why?! It is because, as was noted above, \emph{more than one} \cmmd{nolig} rule can apply to a given word. Consider, for instance, the word \opt{Lauflicht} mentioned earlier. This word happens to be caught by two \cmmd{nolig} rules and one \cmmd{keeplig} rule provided in the file \pkg{selnolig-german-patterns.sty}: \begin{Verbatim} \nolig{aufl}{auf|l} \nolig{flich}{f|lich} \keeplig{flicht} \end{Verbatim} For the word |Lauflicht|, \Verb+\keeplig{flicht}+ serves to undo the action of \Verb+\nolig{flich}{f|lich}+. However, because the string |aufl| is \emph{not} a subset of the string |flicht|, |\keeplig{flicht}| does not undo the action of \Verb+\nolig{aufl}{auf|l}+. Hence, the word |Lauflicht| ends up being typeset\textemdash correctly!\textemdash as Lauflicht, i.e., \emph{without} the fl-ligature. Interestingly, the rule |\keeplig{flicht}| is itself a bit too broad because it improperly catches the composite noun |Sumpflicht|, for which the fl-ligature \emph{should} in fact be suppressed.% %\footnote{The free morphemes contained in the word |Sumpflicht| are |Sumpf| and |licht|\textemdash \emph{not} |Sum| and |pflicht|!} To address this case, the file \pkg{selnolig-german-patterns.sty} provides the rule \Verb+\nolig{Sumpfl}{Sumpf|l}+; for the word Sumpflicht, this \cmmd{nolig} rule is not overridden by the rule |\keeplig{flicht}|. This \cmmd{nolig} rule also serves to suppress the fl ligature in words such as Sumpflabkraut and Sumpfleiche. \subsubsection{The \cmmd{uselig} macro} \label{sec:uselig} The \pkg{selnolig} package also provides the user command \cmmd{uselig}, which acts very much like the \cmmd{keeplig} command to override the action of a \cmmd{nolig} rule. However, it does so purely on a one-off basis. E.g., the command \Verb+\uselig{fj}+ will typeset \enquote{\uselig{fj}} even if the rule \Verb+\nolig{fj}{f|j}+\textemdash which suppresses the |fj| ligature on a global, i.e., document-wide basis\textemdash is active; \emph{without} \cmmd{uselig}, you'd get \enquote{fj}. You should use \cmmd{uselig} instructions only for single words and word fragments; don't use them for longer stretches of text. If you need to suspend the operation of the ligature suppression macros for longer stretches, including entire paragraphs or more, you should use the macros \cmmd{selnoligoff} and \cmmd{selnoligon}, which are described in more detail in \cref{sec:selnoligon}. \subsubsection{The \cmmd{breaklig} macro} \label{sec:breaklig} The macro \cmmd{breaklig}, which doesn't take an argument, is provided as a hopefully easy-to-remember substitute for the lower-level \LaTeX\ command ``\Verb+\-\hspace{0pt}+''. You should insert this macro in places where you want to break up a ligature on an ad-hoc basis and also wish to permit hyphenation. To suppress a ligature on an ad-hoc basis \emph{without} introducing a potential hyphenation point, insert the instruction \enquote{\cmmd{kern0pt}}. For instance, to suppress the {\ebg\emph{sk}} ligature in the word \Verb+groundskeeper+ on a one-off basis, one might enter it as \enquote{\Verb+grounds\textbackslash breaklig keeper+} in order to obtain \emph{\ebg groundskeeper} rather than \emph{\ebg \uselig{groundskeeper}}. To suppress the {\ebg \emph{sk}} ligature for this word as well as for words such as \emph{\ebg greenskeeper} and \emph{\ebg miskeep} throughout the entire document, one could issue the directive \Verb+\nolig{skeep}{s|keep}+; the package provides just such a rule. \subsection[Components of the selnolig package]{Components of the \pkg{selnolig} package} \label{sec:components} The \pkg{selnolig} package has the following components: \begin{itemize} \item The main \enquote{driver} file is called \pkg{selnolig.sty}. It sets up the package's main user macros, \cmmd{nolig}, \cmmd{keeplig}, \cmmd{uselig}, and \cmmd{breaklig}, that were explained in detail in the preceding subsection and loads several other files. \item The package's lua code is in the file \pkg{selnolig.lua}. \item The ligature suppression rules for English and German language documents are contained in the files \pkg{selnolig-english-patterns.sty} and \pkg{selnolig-english-patterns.sty}, respectively. \item Supplemental hyphenation exception patterns, mostly for composite words that involve ligatures that are suppressed by the package's \cmmd{nolig} rules, are contained in the files \pkg{selnolig-english-hyphex.sty} and \pkg{selnolig-english-hyphex.sty}. \item The user guide\textemdash the document you're reading right now\textemdash is provided in the file \pkg{selnolig.pdf}; the associated source code is in the file \pkg{selnolig.tex}. \item Ancillary files: the files \pkg{selnolig-english-test.tex} and \pkg{selnolig-german-test.tex} load the \pkg{selnolig} package as well as either \pkg{selnolig-english-wordlist.tex} or \pkg{selnolig-german-wordlist.tex}. They serve to demonstrate the output of the \pkg{selnolig} package when run on lists of English or German words that are candidates for non-use of ligatures. The files \pkg{selnolig-english-test.pdf} and \pkg{selnolig-german-test.pdf} contain the results of compiling the test programs. Assuming your \TeX\ distribution is either \TeX Live or MiK\TeX, you can access these files by typing \Verb+texdoc selnolig-english-test+ or \Verb+texdoc selnolig-german-test+ at a command prompt. \end{itemize} The \enquote{driver} file \pkg{selnolig.sty} starts by setting up several Boolean switches to structure the processing of options. It then loads the file \pkg{selnolig.lua}, which contains the package's lua code and sets up the user macros discussed in the preceding subsection. The remaining steps in the startup process depend on which language-related options were selected: \begin{itemize} \item If \emph{no} language-specific options are in effect, the setup process terminates. Users may, of course, provide their own \cmmd{nolig}, \cmmd{keeplig}, \cmmd{uselig}, and \cmmd{breaklig} instructions. \item If the \opt{english} option (or one of its synonymous options) is set, the files \pkg{selnolig-english-patterns.sty} and \pkg{selnolig-english-hyphex.sty} are loaded. The former file contains a detailed list of \cmmd{nolig} and \cmmd{keeplig} rules adapted to English language typographic usage; \Cref{sec:eng-listing} provides a complete listing of these rules. The latter file contains a list of hyphenation exceptions, mainly for words that contain one or more potential non-ligation points and for which \TeX's hypenation algorithm either misses valid hyphenation points or selects invalid hyphenation points; see \cref{sec:addlhyph} below. \item If the \opt{ngerman} option (or one of its synonymous options) is set, the files \pkg{selnolig-german-patterns.sty} and \pkg{selnolig-german-hyphex.sty} are loaded. The former file contains ligature suppression rules appropriate for German typographic usage; \cref{sec:germ-listing} lists its contents. The latter file provides additional hyphenation rules for German-language words. \item If the user specifies both the \opt{english} and \opt{ngerman} options (or some of their synonymous options), \emph{both} language-specific style files will be loaded. Under normal circumstances, a user will probably want to load only one or the other set of language-specific files, but not both. \end{itemize} The following is an admittedly rough and rather imperfect indication of just how much more complex the task is of creating a set of ligature suppression rules for German than it is for English: As of the mid-May 2013 version of the \pkg{selnolig} package, the \enquote{basic} set of English language ligature suppression rules for \enquote{common} f-ligatures consists of 32 \cmmd{nolig} and 17 \cmmd{keeplig} directives.\footnote{Including the rules that are activated if the |broadf| and |hdlig| options are both activated, the tally rises to about 420 \cmmd{nolig} and 52 \cmmd{keeplig} instructions.} In contrast, the set of German language ligature suppression rules for \enquote{common} f-ligatures consists of roughly 700 \cmmd{nolig} and~335 \cmmd{keeplig} directives. A ratio of roughly 1:20 in terms of detail and complexity! \section{Additional ligature-related matters} \label{sec:options} %\subsection{The package's main language options} % %The \pkg{selnolig} package currently offers two main language-specific options: %\begin{itemize} %\item \opt{english}; synonyms: \opt{british}, \opt{ukenglish}, \opt{UKenglish}, \opt{amer\-ican}, \opt{usenglish}, \opt{USenglish}, \opt{cana\-dian}, \opt{australian}, and \opt{new\-zealand}. %\item \opt{ngerman}; synonyms: \opt{german}, \opt{austrian}, \opt{naustrian}, \opt{swiss}, and \opt{swiss\-german}. %\end{itemize} %These language options may be used either individually or jointly. Indeed, this user guide was compiled with both the \opt{english} and \opt{ngerman} options set. % %The English and German ligature suppression rules are listed in \cref{sec:eng-listing,sec:germ-listing}, respectively. % \subsection{The \opt{noftligs} option} By default, the \pkg{selnolig} package will load rules to suppress ft and fft ligatures selectively, for both English and German documents. In case you want to suppress these two ligatures \emph{globally} rather than selectively, you could specify the option \opt{noftligs} when loading the package. Doing so will make the package set up the simple rule \Verb+\nolig{ft}{f|t}+ rather than load many separate rules for suppressing ft ligatures selectively.\footnote{For German language documents in particular, suppressing ft and fft ligatures \emph{globally} may help speed up compilation noticeably, as there will be roughly 180 \cmmd{nolig} and 60 \cmmd{keeplig} instructions fewer to run on the document.} You may also wish to specify the \opt{noftligs} option if the font you use in your document doesn't even feature ft and fft ligatures. \subsection[English language case: The broadf and hdlig options]{English language case: The \opt{broadf} and \opt{hdlig} options} \label{sec:eng-opt} The ligature suppression patterns for English language words, contained in the file \pkg{selnolig-english-patterns.sty} and listed in \cref{sec:eng-listing} below, are grouped into four parts. The first two parts concern the suppression of f-ligatures. Part~1 provides a fairly limited, or \enquote{basic}, set of patterns that will always be executed, and Part~2 contains a broader set of ligation suppression rules that will be executed if the \opt{broadf} option is specified. As noted in \cref{sec:english-rules} above, for English-language documents only the fairly limited number of f-ligature suppression rules contained in Part~1 of the file is enabled by default. This is done because eliminating the morpheme-crossing f-ligatures caught if the |broadf| option is set does not appear to be a major concern in English-language typography. There simply doesn't appear to be a need to suppress the~fi (ffi) ligature words that end in~f (ff\,) followed by the particles -ing, -ish, -ier, -iest, -ily, and -iness. Any gain in readability resulting from suppressing these fi and fl ligatures would appear to be more than offset by unsightly visual clashes created by unligated f\kern0pti, ff\kern0pti, f\kern0ptl, and ff\kern0ptl combinations. Part 3 of the file \pkg{selnolig-english-patterns.sty}, which is enabled if the \opt{hdlig} option is set, provides ligature suppression rules for the ct, st, and sp ligatures. Examples are words such as arctangent (not: \uselig{arctangent}), painstaking (not: \uselig{painstaking}), mistake (not: \uselig{mistake}), and trespass (not \uselig{trespass}). Setting the \opt{hdlig} option also enables ligature suppression rules for additional discretionary ligatures such as \emph{th}, \emph{at}, and~\emph{et}. These ligatures might be deemed inappropriate for use in words such as \emph{\uselig{lighthouse}, \uselig{pothole}, \uselig{aromatherapy}, \uselig{albatross}, \uselig{ninety}, \emph{and} \uselig{nonetheless}}. With the \opt{hdlig} option set, these words will be typeset as \emph{lighthouse}, \emph{pothole}, \emph{aromatherapy}, \emph{albatross}, \emph{ninety}, and \emph{nonetheless}. Ligature suppression rules are provided for the following discretionary ligatures, which occur only in the \emph{italic} font shape of the font families used in this document: \emph{th}, \emph{at}, \emph{et}, \emph{as}, \emph{is}, \emph{us}, \emph{ll}, \emph{fr}, and {\ebg \emph{sk}}. Part~3 of \cref{sec:eng-listing} lists these rules. Part 4 of the file \pkg{selnolig-english-patterns.sty}, which is also processed if the \opt{hdlig} option is set, deals with cases where one discretionary typographic literature, say \emph{as}, might pre-empt the use of a more appropriate but trailing typographic ligature, say \emph{st} or~\emph{sp}, in words such as \emph{f\uselig{as}t}~$\to$ \emph{fast} and \emph{cl\uselig{as}p}~$\to$ \emph{clasp}. Note that the issue being addressed in this part is not that of a ligature improperly spanning a morpheme boundary; instead, it is the possibility that \TeX\ might pre-empt one typographic ligature with another ligature within one and the same morpheme. This issue is discussed in more detail in \cref{sec:preempt} below. \subsection{Composite words with ambiguous morphology} Some composite words can be made up of two different morpheme pairs, or even morpheme triples. For instance, the German words \opt{Saufladen} and \opt{Wachstube} may be constructed as \opt{Sauf-laden}\slash \opt{Sau-fladen} and as \opt{Wachs-tube}\slash \opt{Wach-stube}, respectively. In one case, using the fl and st ligatures would be wrong; in the other, using the ligatures helps indicate the intended meaning of the composite words. For words such as these, software isn't smart enough to \enquote{discern} which possible meaning is intended.\footnote{If the \opt{ngerman} option is set and the \pkg{babel} package is loaded as well, the \opt{selnolig} package will break up the fl ligature in \opt{Saufladen} but not the st ligature in \opt{Wachstube}, i.e., the words will be typeset as \enquote{Saufladen} and \enquote{Wachstube}, respectively. If that's \emph{not} what you want, you'll need to mark up the words explicitly as follows: \Verb+\uselig{Saufladen}+ and \Verb+Wachs\breaklig tube+.} Writers, of course, could choose to insert explicit hyphen characters to indicate the intended meaning. The preceding two examples each involve pairs of free morphemes. More complicated cases can occur too. For instance, the composite word \opt{Surftest} can have a meaning that involves a free morpheme and an inflectional morpheme (indicating the past-tense use of the verb), whereas the other meaning involves two free morphemes. Consider the questions \enquote{Surftest Du vergangene Woche in Hawaii?} and \enquote{Hat die Athletin den Surf\breaklig tes\breaklig t bestanden?} In the second question, it would clearly be wrong to use the ft-ligature; the word \opt{Surftest} is therefore entered as ``|Surf\breaklig test|'' in that question. An even more complicated example is the word \opt{Chefinnenleben}, which contains three morphemes. This word can be deconstructed either as \opt{Chefinnen-leben} (\enquote{lives of female bosses}) \emph{or} as \opt{Chef-innenleben} (\enquote{inner life, or lives, of a boss}); the word's middle particle\textemdash\enquote{innen}\textemdash can function both as a suffix to \enquote{Chef} and as a prefix to~\enquote{Leben}. Only in the second case is it wrong to use the fi-ligature. It turns out that the rules of the \pkg{selnolig} package are set so as \emph{not} break up the fi-ligature in the shorter words Chefin and Chefinnen, in keeping with the principle that the fi-ligature is permitted for suffixes that start with an~\enquote{i}. In contrast, \pkg{selnolig} will break up the fi-ligature in the longer words Chefinnenleben and Chefinnenräume; in these cases, the working assumption is that \opt{innen} acts as a prefix to the third morpheme (Leben or Räume). If this is \emph{not} what you want, i.e., if you really do mean to refer to lives or spaces of female bosses, be sure to use \Verb+\uselig{fi}+ instructions to preserve the fi-ligatures. Better yet, use explicit hyphens: Chefinnen-Leben and Chefinnen-Räume. And, while you're at it, do consider writing the other forms as Chef-Innenleben and Chef-Innenräume. Your readers will thank you. Summing up: Some composite words are morphologically ambiguous. For such words, it is (currently) not possible to program software to decide unambiguously whether or not ligatures that might occur in the words should be suppressed. The best advice I can give is to be on the lookout for such words and to take corrective action should \pkg{selnolig}'s choices be wrong. \subsection{How to provide additional ligature suppression patterns} As already noted, it's not possible to claim that the non-ligation rules provided in \pkg{selnolig-english-patterns.sty} and \pkg{selnolig-german-patterns.sty} are complete or, for that matter, will \emph{ever} be complete. If you come across words containing ligatures that ought to be suppressed but aren't caught (yet) by the \pkg{selnolig} package, you could do the following. First, you could insert \cmmd{breaklig} instructions to suppress the ligatures on a case-by-case basis; conversely, if you discover an instance for which \pkg{selnolig} improperly breaks up a ligature, you could override that action by encasing the character pair (or triple) in a \cmmd{uselig} statement. Second, you could create your own \cmmd{nolig} and \cmmd{keeplig} rules to deal with the cases you've discovered on a document-wide basis. A third option, naturally, is to bring this case to the attention of the maintainer of the \pkg{selnolig} package and ask him/her to update the package's ligature suppresssion rules\dots To give an extended example of how one might go about augmenting the set of ligature-suppression rules already provided by the \pkg{selnolig} package, suppose that you've been tasked with preparing a special edition of Thomas Mann's novel \emph{Der Tod in Venedig}. Suppose further that you have chosen to use an \enquote{Antiqua} (\enquote{Roman}) font\textemdash which, naturally, features a ligature for the ffl character triple\textemdash to typeset this special edition, because fewer and fewer people nowadays can manage to read with ease text set in a {\blackletter period-appropriate blackletter font}. During your preparations, you happen to notice (i)~that the novel contains the word \opt{inbegriffleitend}\footnote{This word really does occur in the aforementioned novel! This novel may very well also be the \emph{only place} ever where you'll encounter this word. I performed a Google search for the term \enquote{inbegriffleitend}; the only hits, apart from online editions of the novel itself, were the sites of a couple of French bloggers who agonized over how this word might possibly be translated from German to French\dots} and (ii)~that the \pkg{selnolig} package does not (yet) provide a rule that suppresses the ffl-ligature for this word. To meet the need to suppress the ffl-ligature in \opt{inbegriffleitend}, you could insert a \cmmd{breaklig} instruction, i.e., enter it as ``\Verb|inbegriff\breaklig leitend|''. (Doing so isn't inefficient in the present example because the word \opt{inbegriffleitend} occurs exactly once in the novel.) Alternatively, you could devise a new \cmmd{nolig} rule to catch this word. For instance, you could add the following \cmmd{nolig} rule (to be inserted in your document's preamble, after the \pkg{selnolig} package has been loaded) to suppress the ffl ligature in \opt{inbegriffleitend} as well as in the far more common words \opt{Jugendtreffleiter} and \opt{Kunststoffleitung}: \begin{Verbatim} \nolig{ffleit}{ff|leit} \end{Verbatim} With this rule in place, the words will be typeset as inbegriffleitend, Jugendtreffleiter, and Kunststoffleitung, respectively. Just in case this discussion has made you curious: the file \pkg{selnolig-german-patterns.sty} provides the more general (i.e., less restrictive) rule \Verb+\nolig{fleit}{f|leit}+ to catch these words. When designing your own \cmmd{nolig} and \cmmd{keeplig} rules, you would ideally do so in a way that ensures that the rules are neither too specific (and hence only apply to a very small set of words) nor too general (and hence end up applying to words for which the rule wasn't meant to apply). Of course, this may be easier said than done. Consider, say, what would happen if you came up with the rule \begin{Verbatim} \nolig{flei}{f|lei} \end{Verbatim} This rule would certainly succeed in breaking up the ffl-ligature in the words inbegriffleitend, Jugendtreffleiter, and Kunststoffleitung. It will also break up the fl-ligature in Laufleistung, Häuflein, Kreislaufleiden, Scherflein, and many more words. However, this rule would also \emph{incorrectly} break up the fl-ligature in many other words, including f\kern0pt leissig\slash f\kern0pt leißig, f\kern0pt leischig, Hackf\kern0pt leisch, and Diplomf\kern0pt leißarbeit; I believe most German readers would prefer to see these words typeset \emph{with} the fl-ligature, i.e., as fleissig\slash fleißig, fleischig,~etc. When in doubt, try to err on the side of making your \cmmd{nolig} rules a bit too restrictive. \subsection[How to use the selnolig package to suppress certain ligatures globally]{How to use the \pkg{selnolig} package to suppress certain ligatures \emph{globally}} \label{sec:global-nolig} The main purpose of the \pkg{selnolig} package is, obviously, to disable certain ligatures selectively. However, it can also be used to suppress ligation globally for selected character pairs.\footnote{I first became aware of the potential need for such a feature from reading Frank Mittelbach's posting, \href{http://tex.stackexchange.com/q/61042/5001}{Suppress certain ligatures generally}, on \href{http://tex.stackexchange.com/}{tex.stackexchange.com}.} For instance, suppose that you are typesetting a Turkish text. The Turkish alphabet features both a dotted~i and a dotless~\char"0131\ character. I understand that in Turkish typesetting practice, the fi and ffi ligatures should \emph{never} be employed, so as to avoid creating any doubts as to whether it's an~i or an~\char"0131\ character that follows the~\enquote{f} character. To satisfy the need of Turkish typography for global suppression of the fi and ffi ligatures, one could place the following instruction in the document's preamble: \begin{Verbatim} \nolig{fi}{f|i} \end{Verbatim} %\enlargethispage{0.5\baselineskip} Or, suppose that you have a font that provides ligatures for the {\ebg \uselig{fb}, \uselig{fh}, \uselig{fj},} and {\ebg\uselig{fk}} character pairs (as well as, possibly, the {\ebg \uselig{ffb}, \uselig{ffh}, \uselig{ffj},} and {\ebg\uselig{ffk}} character triples). If you wanted to suppress the four former f-ligatures globally (and also break up the latter ligatures into ff\kern0pt b, ff\kern0pt h, ff\kern0pt j, and~ff\kern0pt k, respectively), you could do so by issuing the following commands: \begin{Verbatim} \nolig{fb}{f|b} \nolig{fh}{f|h} \nolig{fj}{f|j} \nolig{fk}{f|k} \end{Verbatim} In fact, as was already mentioned in \cref{sec:approach}, these commands are activated automatically if the \opt{ngerman} or \opt{english} language options are set. This is done because I was unable to come up with a single word of \emph{German} or \emph{English} origin involving these character combinations that doesn't also entail a morpheme boundary collision. Of course, your document may contain words \emph{not} of German or English origin that contain some of these character pairs and do not involve a morpheme boundary crossing. For such words, it is not necessary to suppress the corresponding ligatures. For instance, in the word \opt{Kafka}, one may not wish to suppress the {\ebg\uselig{fk}}-ligature for this specific word. The \pkg{selnolig} package provides \cmmd{keeplig} rules to preserve the {\ebg\uselig{fk}}-ligature in names such as {\ebg Kafka, Safka, Piefke, Potrafke, Sprafke, Shirafkan, Tirafkan}, and {\ebg Rifkin}.\footnote{Aside: It was Felix Lehmann's desire to preserve the {\ebg\uselig{fk}} ligature in the name \enquote{{\ebg Kafka}} that stimulated the creation of the package's \cmmd{keeplig} macro. Of course, once the \cmmd{keeplig} macro was created, I quickly discovered all kinds of further uses for it.} Your documents may also feature words of \emph{Nordic} origin that contain the \opt{fj} character pair, such as \opt{Sognefjord} and \opt{Dovrefjell}. Because the \opt{fj} character pair in these words does not span a morpheme boundary, the {\ebg\uselig{fj}}-ligature should not be broken up; i.e., the words should be typeset as {\ebg Sognefjord} and {\ebg Dovrefjell}, respectively. The package therefore provides \cmmd{keeplig} rules to take care of (a)~words that contain the particles {\ebg fjord, fjör, fjell}, and {\ebg fjäll} and (b)~names such as {\ebg Eefje, Sufjan, Prokofjew}, and {\ebg Astafjew}. A \enquote{rare} typographic ligature that may warrant global suppression, at least for English language documents, is~\uselig{\emph{ij}}.\footnote{For many fonts I'm familiar with, including the one used for this user guide, the |ij| character pair is available in the upright font shape as a \emph{digraph} rather than as a true, i.e., joined-up, ligature.} To the best of my knowledge, a morpheme boundary crossing occurs for all English language words that contain the~\opt{ij} character pair: \emph{antijam}, \emph{bijection}, \emph{demijohns}, and \emph{hijack}. By the logic set forth above, this ligature should therefore be suppressed for all of these words.\footnote{If this ligature weren't suppressed, the preceding words would be typeset as \emph{\uselig{antijam}, \uselig{bijection}, \uselig{demijohns}, \emph{and} \uselig{hijack}}.} The \uselig{\emph{ij}} ligature also seems inappropriate for most words that came into English from languages such as French, Japanese, and Spanish, including \emph{bijou} (jewel), \emph{gaijin} (foreigner), \emph{jipijapa} (Panama hat), and \emph{marijuana} (Maryjane). (What? Were you maybe thinking of a different meaning of this word? How come?!) The \uselig{\emph{ij}} ligature is therefore suppressed \emph{globally} by the \pkg{selnolig} package if the options \opt{english} and \opt{hdlig} are set. However, this ligature \emph{does} get used a lot in Dutch. Thus, the \pkg{selnolig} package issues various \cmmd{keeplig} directives so that this ligature isn't suppressed for some names and words of Dutch origin, such as \emph{de~Bruijn} and \emph{rijsttafel}.\footnote{The word \emph{rijsttafel}, incidentally, features three consecutive \enquote{rare} ligatures. Another word that contains three rare ligatures, though not consecutive ones, is \emph{\uselig{is}thmus}. } \subsection{What if one ligature pre-empts a trailing, more appropriate ligature?} \label{sec:preempt} If a font provides many discretionary ligatures, the likelihood is high that words will occur for which the use of a ligature for the first two characters of a character \emph{triple} might incorrectly pre-empt the use of a more appropriate ligature for the last two characters of that triple. Incidentally, the issue of ligature pre-emption is not limited to \enquote{discretionary} ligatures; it can also arise when dealing solely with \enquote{common} f-ligatures. Suppose, for instance, that a certain font provides~ff, fi, and fl ligatures but no ffi and~ffl ligatures. Consider how \TeX\ would typeset words containing \opt{ffi} and \opt{ffl} character triples. Left to its own devices, \TeX\ would let the leading ff-ligature pre-empt the trailing fi- and fl-ligatures, resulting in typographically incorrect outcomes for words such as \uselig{wolff}ish (better: wolffish), \uselig{saff}lower (safflower), \uselig{auff}inden (auffinden), and \uselig{Schaff}leisch (Schaffleisch). In this section, we examine the use of \cmmd{nolig} rules to address the ligature pre-emption issue, focusing on cases of~\emph{st}, \emph{sp}, \emph{th}, and~\emph{ta} character pairs being preceded by character pairs (for which the font provides ligatures) that end in~\emph{s} or~\emph{t}, respectively. This focus is dictated largely by the discretionary ligatures provided by the main text font used for this user guide (Garamond Premier Pro). Other ligature-rich fonts may provide further possibilities for one ligature inappropriately pre-empting that for a trailing character pair.% \footnote{For the font Garamond Premier Pro, I've discovered the following, rather peculiar exception to the general rule that \TeX\ always gives precedence to a ligature for the first two characters of a character triple: For the character triple \opt{fis} (as in \opt{fist} and \opt{fish}), \LuaTeX\ gives preference to the trailing \emph{is} ligature over the leading\, \emph{fi} ligature, causing these words to be typeset as \emph{\uselig{fish}} and \emph{\uselig{fist}}, respectively. I can't tell if this is a conscious design feature or a bug in this font. The \pkg{selnolig} package overrides this behavior, i.e., it is set to give preference to the leading\, \emph{fi} ligature over the trailing \emph{is} ligature for words that contain the strings \opt{fist} and \opt{fish}; hence, they'll be typeset as \emph{fish} and \emph{fist}, respectively. Note that if the \opt{broadf} option is set, as is the case for the document you're currently reading, a side effect of this setting is that words such as \emph{deafish, dwarfish, elfish, oafish, selfish, unselfish, wolfish, draffish, giraffish, gruffish, offish, raffish, sniffish, standoffish, stiffish, \emph{and} toffish}, as well as the associated adverbs ending in \emph{-ly}, will \emph{not} feature an \emph{is} ligature. This loss is, hopefully, not too serious.} \subsubsection*{Ligatures for \emph{as}, \emph{\ebg es}, \emph{is}, and \emph{us} that pre-empt an \emph{st} ligature} Suppose that the text font in use provides ligatures for the \emph{as}, \emph{is}, and \emph{us} character pairs as well as for the \emph{st} character pair. By \TeX's rules for forming typographic ligatures, words that contain the character \emph{triples} \opt{ast}, \opt{ist}, or \opt{ust} will see the first two characters ligated, pre-empting the use of a typographic ligature for the trailing \emph{st} character pair. There are at least three distinct reasons why this outcome is not desirable. First, given the rather distinctive look of the \emph{st}~ligature, the word \opt{stochastic} may look a bit odd if the \emph{st} ligature is used only once\textemdash\emph{\uselig{stochastic}}\textemdash simply because the \emph{as} ligature is allowed to pre-empt the second \emph{st} ligature; readers may prefer the look of \emph{stochastic}. Second, non-use of the st/\emph{st} ligature may be undesirable if the same word occurs twice and in close visual proximity, once set in the upright font shape\textemdash for which there are no ligatures for the \opt{as}, \opt{is}, and \opt{us} character pairs and hence for which the issue of pre-emption of the st doesn't arise\textemdash and once in italics: must and \emph{\uselig{must}}; readers may prefer the look of must and \emph{must}. Or, consider the words \enquote{historian} and \enquote{history} when typesetting them in italics: I'd say it's much better to typeset them as \emph{historian} and \emph{history} rather than as \emph{\uselig{historian}} and \emph{\uselig{history}}. Similarly, I suspect that Austrians and Australians\textemdash at least the ones who care about discretionary typographic ligatures\textemdash may prefer to see the name of their country typeset as \emph{Austria} and \emph{Australia} rather than as \emph{\uselig{Austria}} and \emph{\uselig{Australia}}. Finally: Do you prefer the look of \emph{Do \uselig{fast} festive \uselig{fists} foster \uselig{fustiness}?} or that of \emph{Do fast festive fists foster fustiness?} You prefer the latter look too? Great! Third, there may be cases where an \emph{as} ligature both pre-empts a subsequent \emph{st} ligature \emph{and} spans a morpheme boundary, as in the words \emph{\uselig{astride}}, \emph{\uselig{infrastructure}}, and \emph{\uselig{seastrand}}. The readability of such words definitely increases if the \emph{as} ligature is suppressed: \emph{astride}, \emph{infrastructure}, and \emph{seastrand}. The \pkg{selnolig} package is set to give preference to the distinctive-looking \emph{st} ligature over \emph{as}, \emph{is}, and \emph{us} ligatures. To this end, the following \cmmd{nolig} rules are provided:\footnote{The second of these three commands, while correct for most words that contain the string \opt{ist}, unnecessarily suppresses the \emph{is} ligature for words for which the \emph{st} character pair crosses a morpheme boundary, e.g., words that start with \emph{dis-t\ldots}\textemdash e.g., \emph{distend, distribute, distrust, disturb}\textemdash or with \emph{mis-t\ldots}\textemdash e.g., \emph{mistake, mistranslate, mistype}. (Note that separate \cmmd{nolig} rules already suppress the st/\emph{st} ligature for these words.) At this time there are no plans to address this (overall hopefully minor?) issue.} \begin{Verbatim} \nolig{ast}{a|st} \nolig{ist}{i|st} \nolig{ust}{u|st} \end{Verbatim} \subsubsection*{Ligatures for \emph{as}, \emph{\ebg es}, \emph{is}, and \emph{us} that pre-empt an \emph{sp} ligature} The same three reasons for not letting \emph{as}, \emph{\ebg es}, \emph{is}, and \emph{us} ligatures pre-empt an \emph{st} ligature also apply to the case of the equally distinctive-looking \emph{sp} ligature. The \pkg{selnolig} package therefore provides rules to ensure that the \emph{sp} ligature is used in words such as \emph{clasp}, \emph{hasp}, \emph{raspberry}, \emph{teaspoon}, \emph{wasp}, \emph{\ebg espionage}, \emph{hispanic}, \emph{crisp}, \emph{lisp}, \emph{whisper}, and \emph{cusp}. \subsubsection*{Ligatures for \emph{at} and \emph{et} that pre-empt a \emph{th} ligature} Suppose that a font provides ligatures for the \emph{at}, \emph{et}, and \emph{th} character pairs. By \TeX's rules for forming ligatures, without special intervention the word \opt{mathematics} will be typeset as \emph{\uselig{mathematics}} rather than as \emph{mathematics} because the first \emph{at} ligature pre-empts the \emph{th} ligature. The same happens for words such as \emph{\uselig{bath}}, \emph{\uselig{Kathryn}}, and \emph{\uselig{pathology}}. Given the commonness and the distinctive pronuciation of the \opt{th} character pair in the English language, as well as the high frequency of this character pair in words of Greek origin (for which the Latin-alphabet \opt{th} character pair derives from the single Greek character $\theta$/$\vartheta$), it seems undesirable to let the \emph{at}-ligature pre-empt the \emph{th} ligature for these words. Fixing the \emph{at}--\emph{th} ligature pre-emption issue globally\textemdash e.g., via \Verb+\nolig{ath}{a|th}+\textemdash is not completely innocuous, though, because doing so will also suppress the \emph{at} ligature for words such as \opt{boathook}, for which the \emph{th} ligature would span a morpheme boundary and thus shouldn't be employed anyway. For such words, then, there's no need to suppress the \emph{at} ligature. These cases are dealt with by providing specific \cmmd{keeplig} rules to re-enable the use of the \emph{at} ligature. Suppressing an \emph{et} ligature in favor of a subsequent \emph{th} ligature via \Verb+\nolig{eth}{e|th}+ is almost universally correct, either because the \emph{th} ligature \emph{should} take precedence\textemdash as in the words \emph{ethics}, \emph{methane}, and \emph{teeth}\textemdash or because the \emph{et} ligature would cross a morpheme boundary and hence shouldn't be used anyway, as in the words \emph{forethought} and \emph{rethink}. I say that it's \emph{almost} universally correct to do so; however, there are some words, such as \opt{Beethoven}, \opt{prophethood}, and \opt{sweetheart}, for which the \emph{th} ligature would be inappropriate anyway and for which the use of the \emph{et} ligature would hence be unproblematic. To address this issue, \cmmd{keeplig} rules are provided to override the rule \Verb+\nolig{eth}{e|th}+ for these words, so that they'll get typeset as \emph{Beethoven}, \emph{prophethood}, and \emph{sweetheart}.\footnote{Note that this method works if the font being used provides both \emph{et} and \emph{th} ligatures. If the text font you employ provides only a \emph{th} ligature but no \emph{et} ligature, these \cmmd{keeplig} rules should be disabled.} \subsubsection*{Ligatures for \emph{at} and \emph{et} that pre-empt a \emph{ta} ligature} There seem to be very few English words for which an \emph{at} ligature might inappropriately pre-empt a \emph{ta} ligature. One such word is \opt{atap}; its readability is likely enhanced if it's typeset as~\emph{atap} rather than as~\emph{\uselig{atap}}. Virtually all words for which an \emph{et} ligature might inappropriately pre-empt the use of a trailing \emph{ta} ligature appear to be words for which the \emph{et} ligature crosses a morpheme boundary and should be suppressed. Examples are \emph{betake, betatter, bristletail, caretaker, detach, dovetail, foretaste, pretax, retable, retack, retard, retarget, timetable, \emph{and} wiretap}. The \emph{et}-related ligature suppression rules already put in place to deal with morpheme boundary crossing cases should therefore suffice to catch these cases as well. Do be on the lookout for cases of persons' names which include the string |eta| and for which the trailing \emph{ta} ligature should not be pre-empted by the leading \emph{et} ligature. For instance, the author of this package prefers to see his surname typeset as \emph{Loretan} rather than as \emph{\uselig{Loretan}}\dots \section{Further issues} \label{sec:further} \subsection{Known bugs} I'm not sure if the following constitutes a bug or \enquote{merely} a case of incompatibility between \LaTeX\ packages. The \pkg{selnolig} package does not interact well with the \LaTeX\ packages \pkg{ngerman} and \pkg{german}. Fortunately, the \pkg{selnolig} package \emph{does} interact nicely with the \pkg{babel} package if one of the options \opt{ngerman}, \opt{german}, \opt{austrian}, \opt{naustrian}, \opt{swissgerman}, or \opt{swiss} is set. Unless someone can convince me that using the \pkg{ngerman} or \pkg{german} packages is preferable to using the \pkg{babel} package, I probably won't bother addressing this incompatibility. \subsection{Supplementary hyphenation exception patterns} \label{sec:addlhyph} \TeX's hyphenation algorithms are widely acknowledged to be very good. However, for the English language case at least, it tends to miss quite a few permissible hyphenation points when dealing with words that end in |-f-ing|, |-f-ier|, |-f-iest|, |-f-less|, |-f-like|, etc. Hyphenation exception lists are provided in the files \pkg{selnolig-english-hyphex.sty} and \pkg{selnolig-german-hyphex.sty}, respectively, for English and German words. The German-language hyphenation exception list is currently still the shorter of the two. The German word list is shorter in part because it is assumed that writers of German-language documents use the \pkg{babel} package and select the option \opt{ngerman} (or one of the synonymous language options); doing so also loads specialized hyphenation patterns suitable for German texts.\footnote{As was already noted earlier, the \pkg{selnolig} package is also compatible with the \pkg{hyphsubst} and \pkg{polyglossia} packages.} It is possible to instruct \pkg{selnolig} \emph{not} to load the package's hyphenation exception lists. You may want to do so, for instance, if you must use UK-English hyphenation patterns and therefore shouln't use the US English-based hyphenation patterns provided by the package. (To the best of my knowledge, most of the hyphenation patterns employed in \pkg{selnolig-english-hyphex.sty} are common to UK and US English.) To skip loading the additional hyphenation patterns when invoking the \pkg{selnolig} package, you should specify the option \opt{noadditional\-hyphen\-a\-tion\-patterns}. (I am obviously not trying to make it too easy to invoke this option~\dots) As was already noted in \cref{sec:anythingelse}, if you use the \pkg{babel} package with, say, the \opt{ngerman} option, be sure to load \pkg{selnolig} package \emph{after} the \pkg{babel} package. That way, the \pkg{selnolig} package's additional hyphenation exception patterns won't be overridden by \pkg{babel}'s settings. Incidentally, if the files \pkg{selnolig-english-hyphex.sty} and \pkg{selnolig-german-hyphex.sty} are located in a directory that's in the search path of your TeX distribution, these packages may be loaded via the usual \cmmd{usepackage} statements without having to load the entire \pkg{selnolig} package. \subsection[How to track what the selnolig package is doing]{How to track what the \pkg{selnolig} package is doing} \label{sec:debugon} If you execute the command \cmmd{debugon}, detailed information about each \cmmd{nolig} and \cmmd{keeplig} pattern match that is encountered is written to the \opt{.log} file. Note that the default setting is \cmmd{debugoff}, i.e., the package's activity is not recorded by default to the \opt{.log} file. Because of the redundancy built into some of the package's \cmmd{nolig} commands, it is possible that more than one pattern match will occur for a given word. E.g., for the verb \enquote{auffahren}, two separate \cmmd{nolig} commands apply simultaneously, \emph{viz.}, \Verb+\nolig{auff[aeiloruyäöü]}{auf|f}+ and \Verb+\nolig{ffahr}{f|fahr}+. For this word, then, the following information is written to the \opt{.log} file: \begin{Verbatim} pattern match: auffahren - auff[aeiloruyäöü] pattern match: auffahren - ffahr Do ligature suppression for: auffahren Inserting nolig whatsit before glyph: f Last char: n \end{Verbatim} Some words contain more than one potential ligature suppression point. For example, if the word \enquote{Auflaufform}\textemdash which happens to have both an fl- and an ff-ligature that should be suppressed\textemdash is encountered, the following lines are written to the \opt{.log} file; note that in this case, two separate \cmmd{nolig} commands \enquote{catch} the fl and ff ligatures: \begin{Verbatim} pattern match: Auflaufform - flauf pattern match: Auflaufform - Aufl[aeiouyäöü] pattern match: Auflaufform - auff[aeiloruyäöü] pattern match: Auflaufform - fform Do ligature suppression for: Auflaufform Inserting nolig whatsit before glyph: l Inserting nolig whatsit before glyph: f Last char: m \end{Verbatim} If \emph{both} a \cmmd{nolig} and a \cmmd{keeplig} command apply to a word\textemdash as is the case for \opt{fjord}, to which both \Verb+\nolig{fj}{f|j}+ and \Verb+\keeplig{fjord}+ apply\textemdash the following (mercifully short) bit of information is written to the \opt{.log} file: \begin{Verbatim} pattern match nolig and keeplig: fjord - fj - fjord p\end{Verbatim} If more than one \cmmd{nolig} rule \emph{as well as} a \cmmd{keeplig} rule apply to a given word, as is the case for the word \enquote{Streiflicht}, the following information is written to the \opt{.log} file: \begin{Verbatim} pattern match nolig and keeplig: Streiflicht - flich - flicht pattern match: Streiflicht - reifl Do ligature suppression for: Streiflicht Inserting nolig whatsit before glyph: l Last char: t \end{Verbatim} Observe that the first nolig rule's pattern, |flich|, is overridden by the keeplig rule's pattern |flicht|. The second nolig rule's pattern, |reifl|, is \emph{not} overridden, and it is the debugging-related information associated with the second \cmmd{nolig} rule that ends up being written to the |.log| file. To terminate or suspend the writing of the debugging-related information to the \opt{.log} file, execute the command \cmmd{debugoff}. To restart the logging of this information, issue the command \cmmd{debugon}. \subsection[Suspending and restarting the operation of selnolig's macros]{Suspending and restarting the operation of \pkg{selnolig}'s macros} \label{sec:selnoligon} By default, \pkg{selnolig}'s macros are switched on (assuming, of course, that your document is compiled under \LuaLaTeX). If you want to suspend their operation, you should issue the command \begin{Verbatim} \selnoligoff \end{Verbatim} at the desired point in your document. For instance, in this document the command \cmmd{selnoligoff} is issued at the start of the appendices. This is done because most of the material in the appendices (which contain mostly listings of lua and \LaTeX\ code) is typeset using a monospaced font for which all typographic ligatures have been disabled up front. Hence, there's no need to disable any ligatures selectively in that part of the document, is there? Conversely, if \pkg{selnolig}'s macros need to be switched back on, just issue the command \begin{Verbatim} \selnoligon \end{Verbatim} In this document, this is done at that start of \cref{sec:budd}, which provides a long list of words found in \emph{Die Buddenbrooks} for which f-ligatures should be suppressed. \subsection{Lists of words that fit German and English non-ligation patterns} \label{sec:lists} Extensive lists of German and English language words for which one or more ligatures should be suppressed are provided in the supplemental files \pkg{selnolig-german-test.pdf} and \pkg{selnolig-english-test.pdf}. As was already mentioned in \cref{sec:components}, these documents may be viewed by opening a command window and typing \pkg{texdoc selnolig-german-test} and \pkg{texdoc selnolig-english-test}. The list of words in \pkg{selnolig-german-test.pdf} consisted initially almost entirely of the words provided by the \pkg{rmligs} script. However, I've added quite a few more words to that list in the meantime. Thanks to the great work of Felix Lehmann and Steffen Hildebrand explained in more detail in \cref{sec:thanks}, the package's German language ligature-suppression rules now apply to a much larger set of words than what's listed in \pkg{selnolig-german-test.pdf}. Naturally, suggestions for still more words to be de-ligated are always welcome. %The files \pkg{selnolig-german-test.tex} and \pkg{selnolig-english-text.tex} are \enquote{driver programs} that load the \pkg{selnolig} package and then run it on the respective lists of German- and English-language words. To compile the driver programs, be sure to use \LuaLaTeX. \subsection{Making suggestions and reporting bugs} Feedback on the \pkg{selnolig} package is always welcome, whether for making general suggestions, heaping praise on the package's author, or for reporting bugs and other problems. \href{mailto:loretan.mico@gmail.com}{Emailing me} about \pkg{selnolig} will definitely get my attention. I'm also a fairly active participant (handle: \enquote{Mico}) on \href{http://tex.stackexchange.com/}{tex.stackexchange.com}, and I occasionally check what goes on in \href{http://texwelt.de/wissen/}{TeXwelt.de}, \href{https://groups.google.com/forum/?fromgroups#!forum/comp.text.tex}{comp.text.tex} and \href{https://groups.google.com/forum/#!forum/de.comp.text.tex}{de.comp.text.tex}. In case I appear to miss any \pkg{selnolig}-related discussions on these sites, please don't hesitate to contact me to alert me to them. When reporting what you believe to be shortcomings and/or mistakes with the package's ligature suppression rules, please use a structure similar to the template given in \cref{sec:template} of this document. If anyone is thinking of creating rules to suppress ligatures selectively for languages other than English and German and would like to make use of the \enquote{machinery} provided by the \pkg{selnolig} package, I'd be delighted to provide reasonable amounts of programming-related assistance.\footnote{A promising candidate (in my view at least) for getting the \pkg{selnolig} treatment would be {\blackletter blackletter} or {\blackletter Fraktur} fonts and their special ligatures. When used to write German documents, it is well known that these ligatures need to be suppressed for certain words, i.e., selectively. E.g., to suppress the {\blackletter tz} (|tz|) ligature for the words {\blackletter Brotzeit}, {\blackletter Nachtzeit}, and {\blackletter Tatzeit}, one could set up the rule \Verb+\nolig{tzeit}{t|zeit}+. If somebody wants to take on this project, I'd be happy to help you get started.} The latest versions of all files that comprise the \pkg{selnolig} package may be found at the \href{https://github.com/micoloretan/selnolig}{package's GitHub site}. What's there may be slightly more up to date than what I manage to push to the \href{http://ctan.org/tex-archive/macros/luatex/latex/selnolig}{CTAN page}. \section{License and acknowledgments} \label{sec:thanks} The entire \pkg{selnolig} package is placed under the terms of the \LaTeX\ Project Public License, version~1.3 or later (\url{http://www.latex-project.org/lppl.txt}). It has the status \enquote{maintained}. I owe a huge intellectual and programming debt to Patrick Gundlach and Taco Hoekwater, who responded kindly and generously with detailed computer code to various queries I posted to \href{http://tex.stackexchange.com}{\texttt{tex.stackexchange.com}}.\footnote{See especially the questions \href{http://tex.stackexchange.com/q/37443/5001}{Any suggestions/requests for features for a new package that allows disabling ligatures for (pre)selected words?}, \href{http://tex.stackexchange.com/q/48516/5001}{How to suppress the operation of a luatex-defined macro on a string if the string is part of macro or a label}, and \href{http://tex.stackexchange.com/q/63005/5001}{Ligature suppression algorithm fails if the word in question contains an Umlaut (dieresis) before the ligature}.} Without their help and support, this package would not exist. They certainly deserve most of the credit for the lua code used by the \pkg{selnolig} package. Felix Lehmann (a linguist and expert in morphology, i.e., the study of morphemes) and Steffen Hildebrandt (computer scientist extraordinaire) served as patient and careful testers of several early beta versions of this package, uncovering and fixing bugs, pointing out unclear passages in the user guide, writing scripts to automate the discovery of redundancies and syntax errors in the package's \cmmd{nolig} and \cmmd{keeplig} rules, and providing many excellent suggestions for improving the package. Steffen also provided crucial modifications to the package's lua code to make possible the \cmmd{keeplig} and \cmmd{uselig} macros. Equally importantly, Felix and Steffen created scripts to test systematically and comprehensively the package's German \cmmd{nolig} and \cmmd{keeplig} rules for linguistic adequacy and (relative) completeness. They began with a corpus of almost 850 million [!!] German words, which they marked up with information on the morphological constituents of each word. From this huge word list, they extracted a set of 462,000 unique word forms containing potential f-ligatures. According to their analysis, an early version (October 2012) of the \pkg{selnolig} package already dealt correctly with 85 percent (394,000) of those word forms, while still generating 21,000 Type~I errors and 48,000 Type~II errors!\footnote{In the context of the \pkg{selnolig} package, a Type~I error is the failure to suppress a typographically inappropriate ligature, and a Type~II error is the improper suppression of a typographically appropriate ligature.}\textsuperscript{,}\footnote{1,000 words contained more than one potential f-ligature, hence the difference.} Fortunately for me, they quickly discovered that a non-negligible part of the 69,000 errors wasn't real but, rather, the result of spelling errors in the words included in the corpus and/or of suspect morphological analysis. Moreover, the detailed Type~I and Type~II error lists they generated were critical in helping me refine\textemdash and occasionally revise completely\textemdash the package's \cmmd{nolig} and \cmmd{keeplig} rules, rapidly bringing down the number of Type~I and~II errors. For instance, after Steffen wrote the lua code that made possible the \cmmd{keeplig} macro, the single rule \Verb+\keeplig{flicht}+ eliminated, in one fell swoop, more than 10,000~[really!] Type~II errors generated by the package's initial suppression of the fl-ligature in words that contain the morpheme \opt{pflicht}. All major changes to the German language ligature suppression patterns are still being subjected to their testing algorithms, streamlining the tasks of detecting what's left to improve and catching any newly introduced errors. Felix and Steffen started the automated testing of the package's |\nolig| and |\keeplig| rules as their finals project for the course \emph{Introduction to Computational Linguistics} at the University of Massachusetts at Amherst which they attended in the fall of~2012.% \footnote{Both their \href{https://github.com/SHildebrandt/selnolig-check}{code} and their \href{https://github.com/SHildebrandt/selnolig-check/blob/master/selnolig-check-documentation.pdf?raw=true}{finals paper} are available online.} They wish to thank the Institut für Maschinelle Sprachverarbeitung at the Universität Stuttgart for granting them a license for the morphological analysis tool SMOR\footnote{Schmid, H., A. Fitschen, and U. Heid, 2004, \enquote{SMOR: A German Computational Morphology Covering Derivation, Composition, and Inflection}, \emph{Proceedings of the IVth International Conference on Language Resources and Evaluation (LREC)}, pp.~1263--1266, Lisbon, Portugal.} and, in particular, Helmut Schmid for his guidance. They also express their gratitude to the \emph{Web-as-Corpus kool ynitiative} (\emph{WaCky}) for letting them use the SDeWaC corpus,\footnote{Baroni, M., S.~Bernardini, A.~Ferraresi, and E.~Zanchetta, 2009, \enquote{The WaCky Wide Web: A Collection of Very Large Linguistically Processed Web-Crawled Corpora}, \emph{Language Resources and Evaluation}, 43~(3), pp. 209--226.} as well as to Rajesh Bhatt (University of Massachusetts at Amherst), Miriam Butt (Universität Konstanz), and Sabine Schulte im~Walde (Universität Stuttgart) for helping them find the right resources for their project. The \href{http://www.ctan.org/tex-archive/support/rmligs}{\pkg{rmligs}} perl script by Björn Jacke lists about 1,700 German language words for which various f-ligatures should be suppressed. The initial set of German-language \cmmd{nolig} rules I wrote were designed to capture the words listed by \pkg{rmligs}.\footnote{All versions of the \pkg{rmligs} package are archived at \url{http://www.j3e.de/ispell/igerman98/dict/}.} Matthias Vogel very kindly shared with me a set of regular-expression based ligature suppressing macros, named \href{http://www.winedt.org/Macros/LaTeX/Ligatures-German.php}{Ligatures-German}, he wrote for the WinEdt programmer's editor. Matthias's macros work by inserting \pkg{babel}-style~\Verb+"|+ ligature-suppressing directives in the appropriate places in the |.tex| file, which can then be compiled with pdf\LaTeX. These macros, and a file he sent me containing an extensive list of German words for which one or more f-ligatures should be suppressed, led me to thoroughly refine and extend the scope of the \pkg{selnolig} package's German language ligature suppressing rules. Barbara Beeton provided very detailed comments on an early version of the package's user guide and its English language ligature suppression rules. She also noted that the readability of my surname might be enhanced if it were typeset as \emph{Loretan} instead of as \emph{\uselig{Loretan}}\dots\ David Bellows contacted me about the package and provided a suggestion that led me to extend some of the \cmmd{nolig} rules in Part~2 (activated by setting the option |broadf|) of the file \pkg{selnolig-english-patterns.sty}. Other contributors who posted answers and comments to various ligature-related questions I've asked occasionally on \href{http://tex.stackexchange.com/}{tex.stackexchange.com} and \href{https://groups.google.com/forum/#!forum/comp.text.tex}{comp.text.tex}, too numerous to name individually, also helped influence the genesis of this package. To all of you, I express my sincere thanks. %The website \url{http://www.morewords.com} provides very convenient methods for searching English language words that may contain cases of ligature collisions across morpheme boundaries. For German words, the site \url{http://corpora.informatik.uni-leipzig.de/?dict=de} provides a similar resource. \clearpage \appendix \selnoligoff % turn off selnolig's macros \clubpenalty100 \widowpenalty100 \small % reduce text font size from 11pt to 10pt \section[The package's English-language ligature suppression rules]{English-language ligature suppression rules: \pkg{selnolig-english-patterns.sty}} \label{sec:eng-listing} \begin{multicols}{2} \VerbatimInput{selnolig-english-patterns.sty} \end{multicols} \clearpage \section[The package's German-language ligature suppression rules]{German-language ligature suppression rules: \pkg{selnolig-german-patterns.sty}} \label{sec:germ-listing} Introductory note: To accommodate the practice of Swiss-German writers of not using the \enquote{ß} character and using \enquote{ss} in its place, all search-and-insert strings that contain an \enquote{ß} character are duplicated with equivalent search-and-insert strings containing~\enquote{ss}. \bigskip \begin{multicols}{2} \VerbatimInput{selnolig-german-patterns.sty} \end{multicols} \clearpage \section[The package's main style file: selnolig.sty] {The package's main style file: \pkg{selnolig.sty}} \label{sec:sty} \VerbatimInput{selnolig.sty} \clearpage \section[The package's lua code: selnolig.lua]{The package's lua code: \pkg{selnolig.lua}} \label{sec:luacode} \VerbatimInput{selnolig.lua} \clearpage \section[Reporting bugs and other issues with the selnolig package: A suggested template]{Reporting bugs and other issues with the \pkg{selnolig} package:\\A suggested template} \label{sec:template} \VerbatimInput{selnolig-bugreport.tex} \clearpage \selnoligon \section{A case study: f-ligatures suppressed in \emph{Die Buddenbrooks} and in \emph{Faust I \& II}} \label{sec:budd} \ebg As I noted in the introduction, in English language documents the suppression of ligatures that cross morpheme boundaries is generally not considered to be an urgent typographical priority, possibly because words with ligatures that cross morpheme boundaries aren't all that common. I performed a quick check of this claim by compiling Jack London's novels \emph{Call of the Wild} and \emph{The Sea Wolf}, obtained from Project Gutenberg's site in plain-text format, with \pkg{selnolig}. Sure enough, in \emph{Call of the Wild} only one f-ligature is suppressed: the \enquote{fl} ligature in \uselig{wolflike}\slash wolflike. In \emph{The Sea Wolf}, again only one f-ligature is suppressed: the \enquote{ft} ligature in \uselig{fifth}\slash fifth (3 instances). Not exactly a lot of words, right?! The situation is quite different for German texts. E.g., in Thomas Mann's novel \emph{Die Buddenbrooks}, more than 430 words are caught by \pkg{selnolig}'s rules; they are listed below. (Incidentally, the fi-ligature in the words Kaufinger and Kaufingerstraße is suppressed because \enquote{\uselig{Kaufinger}} (\enquote{chew-finger}) looks unintentionally hilarious.) \enlargethispage{1.5\baselineskip} \smallskip {\RaggedRight \noindent \begin{tabularx}{\textwidth}{@{}lX@{}} \midrule f-lig.\ & Words for which the f-lig.\ is suppressed by \pkg{selnolig}'s rules (433 total) \\ \midrule \uselig{fb}, \uselig{ffb} & Aufbahrung, aufbegehrt, aufbegehrte, Aufbewahrungsort, Aufbietung, aufblicken, aufblitzenden, aufblitzt, aufblitzten, aufblühende, aufbrach, aufbrechen, aufbringen, aufbringt, Aufblick, Aufbrausen~(2), Aufbruch~(4), Briefbeschwerer, Dampfbrot, daraufbringen, greifbaren, Greifbares, heraufbefördert, hinaufblickte, Hofbräu, Hofbräuhaus~(3), Hofbräuhause, Kopfbewegung~(3), Kopfbewegungen, Krippenaufbaus, Laufbahn, Privatbriefbogen, Scharfblick, Schiffbrüchen, Schlafbedürfnis, steifbeinigen, Stiefbruder~(4), strafbar~(2), tiefbelustigt, tiefblauen~(2), unangreifbar\\ %% ff & Auffahren, Auffahrt, auffallend~(5), auffassen, Auffassung, auffordern~(2), aufforderte~(4), Aufforderung~(2), Aufforderungen, Aufführung, auffällig~(2), auffällige~(2), auffälliger~(2), auffällt, aufführen, aufführte, fünffacher~(2), herauffuhr, heraufführte, Kauffahrteischiffen (Type~II error\slash arch. spelling?), Liebhaberaufführung, Schlaffrisur, unauffällig, unauffällige\\ %% ffi & Auffinden\\ %% ffl & ffl $\to$ f\kern0ptfl: Aufflackern, aufflog; ffl $\to$ ff\kern0ptl: unübertrefflich~(3), unübertrefflichen, vortrefflich~(11), vor\-treff\-li\-che~(2), vortrefflichen, vortrefflicher, vortreffliches, vortrefflichsten\\ %% {\ebg \uselig{fh}, \uselig{ffh}} & aufheben~(3), Aufhebens, aufhebt, Aufhebung, aufheitert, aufhellte, aufhielt~(3), Aufhorchend, aufhorchenden, aufhält~(2), aufhören~(14), aufhörte~(6), daraufhin~(2), Fünfhausen~(2), fünfhundert~(5), Hundertsiebenundzwanzigtausendfünfhundert~[!!], Kaufherren~(2), krampfhaft~(6), krampfhafte~(2), krampfhafte, krampfhaften~(4), Schlaffheit, Steifheit~(2), unaufhaltsam~(5), unaufhaltsame, unaufhaltsamen, unaufhaltsamer, unaufhörlich~(14), unaufhörliche, unaufhörlichen\\ %% fi & Kaufinger, Kaufingerstraße, tiefinnere\\ %% {\ebg \uselig{fj}} & Aufjauchzen, elfjährig, fünfjährigen, zwölfjährig~(2)\\ {\ebg \uselig{fk}, \uselig{ffk}} & aufkam, aufklingen, aufklärte, Aufklärung, aufkommen, Briefkas\breaklig{}ten, Haffkrug, heraufkamen, heraufkommen~(3), Heraufkommenden, Kehlkopfknoten, Kopfkissen~(2), Napfkuchens, Stopfkugel~(2)\\ fl & auflehnte, aufleuchtete~(2), auflösen, auflöste, Auflösung~(11), begreiflich, Begreiflicherweise, be\-hilf\-lich~(6), beruflichen~(3), glimpflich~(2), gräflichen, Großkaufleuten, Handgreifliches, hilflos~(7), hilfloses, Hilf\-losigkeit~(2), Hilflosigkeitsgefühl, hinaufläuft, höflich~(7), höfliche~(2), höflichen, Höflichkeit~(6), Höflichkeitsform, höflichst, kampflose, kampflosen, Kaufleute~(9), Kaufleute-Witwenkasse, Kauf\-leuten~(3), Knopfloch, käuflich~(2), reiflich, schimpfliche, schimpflichen, schlaflos, schlaflose, schlaflosen, Schlaflosigkeit, steiflehnige, teuflischem, teuflischer, teuflisches, tiefliegende, tiefliegenden~(20), unbegreif\-lich~(3), unbegreifliche, unbegreiflichen~(3), unbegreiflicher, Unbegreiflicherweise, unerschöpflich, unhöflichen, Verzweiflung~(23), verzweiflungsvoll~(2), Zöpflein, zweifle~(2)\\ ft & auftat~(2), auftauchen, auftauchte, auftauen, auftreten, Auftreten~(3), Auftritt~(3), Auftritte~(2), Auftritten, Auftrittes, auftrittst, Aufträge, Briefträger, elftausend, Elfter, Elftes~(3), fünftausend~(2), fünften~(3), fünfter, Fünfter, Fünftes~(9), Hoftheater, Hoftheaterbesuchen, Hoftore (2), Hoftür, schlaftrunkenen, schlaftrunkener, Schnupftuch~(3), Schnupftuche, Schnupftuches, Tauftages, Zwölftes\\ \bottomrule \end{tabularx} \clearpage Here are the analogous compilations for Johann Wolfgang Goethe's \emph{Faust~I} and \emph{Faust~II}: \smallskip \noindent \begin{tabularx}{\textwidth}{@{}lX@{}} \midrule f-lig.\ & Words for which the f-lig.\ is suppressed by \pkg{selnolig}'s rules\\ \midrule \multicolumn{2}{@{}l@{}}{\emph{Faust I} (30 words total)}\\ \uselig{fb} & aufbewahrt, aufbindend, Schiffbruchs, Strumpfband, tiefbewegte\\ \uselig{fh} & fünfhundert\\ fl & hilflos, höflich, Höflichkeit~(3), Unhöfliches, Verzweiflung~(2), teuflisch, teuflischen~(2), unbegreiflich~(3), würfle\\ ffl & Schifflein, trefflich~(4), treffliche, trefflichen, trefflicher\\ ft & Auftrag\\[2ex] \multicolumn{2}{@{}l@{}}{\emph{Faust II} (31 words total)}\\ \uselig{fb} & Aufbau, aufbewahrt, aufblühender, Raufbold, Ungreifbarer\\ \uselig{fh} & unaufhaltsam~(2)\\ fl & flehn [Type~II error!], fünften, greiflich, höflich~(2), Liebesbrieflein, Scherflein, schimpflich, sträflich, Tiefauflauerndes, Tröpflein, überteuflisch, unbegreifliche, unbehülflich, Verwerfliche, Verzweiflung, Zweifler, verwerflich, verzweiflend, zweiflen\\ ffl & trefflich\\ ft & Auftrag, Auftretend, Fünftausend\\ \bottomrule \end{tabularx} } % end of \RaggedRight \medskip Without meaning to comment in any way on the literary merits of these works, I find it interesting that Goethe's two pieces contain far fewer instances of words for which ligatures need to be broken up than does \emph{Die Buddenbrooks}. I also find it mildly interesting that no \opt{fj} and \opt{fk} instances are present in the two Goethe pieces. Also at least somewhat interestingly, the noun \enquote{Verzweiflung} occurs 23 times in \emph{Die Buddenbrooks} but only 3~times across both parts of \emph{Faust}. Note the Type~II error, \emph{viz.}, the word \enquote{flehn} (better: \uselig{flehn}), in the listing for \emph{Faust~II}. This error (which I've deliberately not removed) serves to illustrate\textemdash just in case this wasn't already clear\textemdash that \pkg{selnolig} is not and cannot ever be entirely error-free. If you use the package in your work, I recommend that you compile your document with \cmmd{debugon} turned on from time to time. Doing so will write the results of \pkg{selnolig}'s work to the |.log| file. Then, examine the log file, either directly or via a machine search for lines containing the string \enquote{ligature suppression}. (This is, by the way, precisely how I started putting together the word lists shown above.) Any Type~II errors should be fairly easy to spot this way. To fix them, you can edit your file and encase the words in question in \cmmd{uselig} instructions, or you can create new \cmmd{keeplig} instructions to override the action of the offending \cmmd{nolig} instructions. When creating new \cmmd{keeplig} rules, do take care not to introduce new Type~I errors in the process. For instance, don't create the rule \Verb+\keeplig{flehn}+, as doing so would cause failures to break up the fl ligatures in, \emph{inter alia}, auflehnen, Auto\-kopflehne, Stofflehne, and steiflehnig. (Actually, since these particular words don't occur in \emph{Faust~II}, you'd be \enquote{safe} with \Verb+\keeplig{flehn}+ as long as \emph{Faust~II} is the only document you need to typeset\dots) It's important to keep in mind that the lists shown above contain (i) \pkg{selnolig}'s \enquote{success stories} and (ii) any Type~II errors. By design, they cannot show the package's Type~I errors, \emph{viz.}, failures to break up ligatures that span morpheme boundaries. To catch any Type~I errors, I'm afraid you'll have read your output file and scan it for inappropriate ligatures. (Hopefully, there won't be too many such instances!) To remedy any Type~I errors, you should either insert \cmmd{breaklig} instructions in the words in question or devise new \cmmd{nolig} instructions\textemdash whatever works best for you. A final plea: Please feel free to inform me about any Type~I and/or Type~II errors you believe are generated by the \pkg{selnolig} package. My email address is stated on the front page of this user guide. Happy \TeX ing! \end{document}