\expandafter\ifx\csname XeTeXinputencoding\endcsname\relax
\else
  \XeTeXinputencoding=cp1252
\fi
\ifnum\month<10 \edef\month{0\the\month}\else	\edef\month{\the\month}\fi
\ifnum\day<10	\edef\day{0\the\day}\else \edef\day{\the\day}\fi
\documentclass[article(11pt),%
% produce,% Uncomment this line to produce xesearch.sty
index]{codedoc}

% CodeDoc declaration
\ProduceFile{xesearch.sty}[xesearch][v.0.2][\the\year/\month/\day]


% Fonts
\usepackage{xltxtra}
\setmainfont[Numbers=OldStyle,Ligatures=Common,Mapping=tex-text]{Palatino Linotype}
\makeatletter
% Old style numbers won't climb into
% footnote status.
\def\@makefnmark{%
  \bgroup
  \fontspec[Ligatures=Common,Mapping=tex-text]{Palatino Linotype}%
  \textsuperscript{\@thefnmark}%
  \egroup
  }
\makeatother
\setsansfont[Scale=.85]{Verdana}
\newfontfamily\altmono[Scale=.85]{Excalibur Monospace}
\def\ttslant#1{%
  {\fontspec[FakeSlant=.2,Scale=.85]{Excalibur Monospace}#1}%
  }
\def\texttt#1{{\altmono#1}}


% Here it is.
\usepackage{xesearch}
\SearchList*{logos}{\csname#1\endcsname}{?TeX,?ConTeXt,xesearch}
\def\ConTeXt{Con\TeX t}
% The \Xe must be redefined
% because it's ugly in Verdana.
\def\xesearch{%
  \textsf{%
    X%
    \kern-.12em
    \lower.2em\hbox{\reflectbox{E}}%
    \kern-.03em Search%
    }%
  }
\def\Protect#1{%
  \StopSearching
  #1%
  \StartSearching{}%
  }


% Page dimensions and colors.
\usepackage[paperheight=21cm,paperwidth=31.7cm,left=2cm,right=2cm,top=2cm,bottom=2cm]{geometry}
\usepackage{xcolor}
\pagestyle{empty}

% For the implementation.
\usepackage{parcolumns}

% One wonderful package.
\usepackage{flowfram}
\newcount\startpage\startpage3
\newcount\endpage \endpage\startpage \advance\endpage15
\newcount\codepage \codepage\endpage \advance\codepage1
\newcount\indexpage \indexpage\codepage \advance\indexpage32
%\def\startpage{3}
%\def\endpage{18}
%\def\indexpage{51}
% 
\newflowframe[1]{31.7cm}{21cm}{-2cm}{-2cm}[title]
\newflowframe[2-\endpage]{15cm}{17cm}{0cm}{0cm}[main]
\newflowframe[\codepage-\indexpage]{27.7cm}{17cm}{0cm}{0cm}[maincode]
\newflowframe[>\indexpage]{5cm}{17cm}{0cm}{0cm}[index1]
\newflowframe[>\indexpage]{5cm}{17cm}{5.5cm}{0cm}[index2]
\newflowframe[>\indexpage]{5cm}{17cm}{11cm}{0cm}[index3]
\newflowframe[>\indexpage]{5cm}{17cm}{16.5cm}{0cm}[index4]
\newflowframe[>\indexpage]{5cm}{17cm}{22cm}{0cm}[index5]
\def\CodeColor{black!60!red}
%
\newdynamicframe[1]{11.7cm}{10cm}{16cm}{0cm}[toc]
%
\newdynamicframe[\startpage-\endpage]{11.7cm}{17cm}{16cm}{0cm}[exblock]
\setdynamicframe*{exblock}{clear=true,backcolor={yellow!14!white}}
%
\newdynamicframe[>1]{5cm}{\baselineskip}{24cm}{18cm}[header]
\setdynamiccontents*{header}{\fontspec{Palatino Linotype}\itshape\Xe Search user guide\ \textbullet\ \thepage}
%
\vtwotone[<\codepage,>\indexpage]{\paperwidth-.1cm}{yellow!11!white}{wW}{.1cm}{red!80!black}{rR}
\vNtone[\codepage-\indexpage]{3}{12.2cm}{yellow!11!white}{wWw}{\paperwidth-12.3cm}{yellow!14!white}{cCc}{.1cm}{red!80!black}{rRr}
%


\newcount\lettercount
\newcount\casecount
\newcount\rancolor
\newcount\ranangle
\newdimen\ranwidth
\newdimen\ranheight
\newdimen\ranWidth
\newdimen\ranHeight
\newif\iftoc
\newbox\letterbox

\def\ranletter{%
  \setrannum\lettercount{1}{26}%
  \setrannum\casecount{0}{1}%
  \setrandim\ranWidth{0em}{45cm}%
  \setrandim\ranwidth{0pt}{\ranWidth}%
  \setrandim\ranHeight{0em}{25cm}%
  \setrandim\ranheight{0pt}{\ranHeight}%
  \setrannum\rancolor{0}{100}%
  \setrannum\ranangle{0}{360}%
  \tocfalse
  \advance\ranwidth-1cm
  \advance\ranheight-1cm
  \ifdim\ranwidth>40cm
    \toctrue
  \else
    \ifdim\ranheight>30cm
      \toctrue
    \else
      \ifdim\ranwidth>17cm
        \ifdim\ranheight>8.5cm
          \toctrue
        \fi
      \fi
    \fi
  \fi
  \unless\iftoc
    \setbox\letterbox=\hbox{%
      \lower\ranheight\hbox{%
        \kern\ranwidth
        \color{yellow!11!white!\the\rancolor!black}%
        \rotatebox{\the\ranangle}{\csname @\ifcase\casecount a\else A\fi lph\endcsname\lettercount}%
        }
      }%
    \wd\letterbox0cm \ht\letterbox0cm \dp\letterbox0cm
    \box\letterbox
  \fi
  }

\newcount\generator
\def\generate{%
  \ifnum\generator<10000
    \ranletter
    \advance\generator1
    \let\next\generate
  \else
    \ranletter
    \let\next\relax
  \fi\next
  }


% CodeDoc verbatim environments.
\ShortVerb"
%
% This to add to the right frame.
\long\def\AppendEx#1\EOE{%
  \appenddynamiccontents*{exblock}{%
    \rightskip.3cm
    #1}
  }
%
% Create the title and label of
% the example.
\makeatletter
\long\def\extitle#1#2{%
  \advance\excount1
  \def\@currentlabel{\the\excount}%
  \label{#1}%
  \def\ExTitle{#2}%
  }
\makeatother
%
%
% Put the final product in a box.
% I need a new box for each example
% on the same page, so I simply
% create a new box for each example.
\def\ExampleBox{%
  \expandafter\newbox\csname box@\the\excount\endcsname
   \edef\excaption{
     \noexpand\hfil\noexpand\bfseries\noexpand\scshape
     \noexpand\normalsize
     Example \the\excount: \ExTitle}%
   \global\expandafter\setbox\csname box@\the\excount\endcsname=\vbox\bgroup
  \ifcase\subexbox
  \or
    \expandafter\unvbox\csname subbox@1\endcsname
  \or
    \expandafter\unvbox\csname subbox@1\endcsname
    \expandafter\unvbox\csname subbox@2\endcsname
  \or
    \expandafter\unvbox\csname subbox@1\endcsname
    \expandafter\unvbox\csname subbox@2\endcsname
    \expandafter\unvbox\csname subbox@3\endcsname
  \fi
  \global\subexbox0
  }
%
%
% Layout of the example. 
\def\inexample#1#2{%
   \StopList{logos}%
   \small
   \hsize11.5cm
   \parindent0pt
   \leftskip.4cm
   \vskip.3cm
   \textcolor{\CodeColor}{#1}%
   \leftskip.1cm
   \vskip.2cm
   #2%
  }

\def\MakeBox{%
  \egroup
  \expandafter\AppendEx\expandafter\vfil\expandafter\unvbox\csname box@\the\excount\endcsname\EOE
  }
%
% The example that is typeset in
% the right frame.
\newcount\excount
\NewExample{sidex}%
  {\altmono#}{#}%
  {%
   \ExampleBox
     \inexample{\CodeInput}{\CodeOutput}%
     \vskip.4cm
     \excaption
   \MakeBox
  }
%
% This one also goes in the
% right frame but under the
% same heading as the following
% sidex example.
\newcount\subexbox
\NewExample{sidex2}%
  {\altmono#}{#}%
  {%
  \global\advance\subexbox1
  \expandafter\newbox\csname subbox@\the\subexbox\endcsname
  \expandafter\global\expandafter\setbox\csname subbox@\the\subexbox\endcsname=\vbox{%
     \inexample{\CodeInput}{\CodeOutput}%
     \vskip.1cm
     }%
  }
%
% Verbatim text in the main text: 
\NewExample{mainex}%
  {\par
  \altmono#}%
  {}{%
  \StopList{logos}%
  \vskip.2cm plus .1cm minus .1cm
  \leftskip2\parindent
  \CodeInput
  \vskip.2cm plus .1cm minus .1cm
  }
%
% Syntax highlighting examples only
% show the input code, with the desired
% specifications.
\NewExample{hilite}%
  {#}{}%
  {
   \ExampleBox
     \inexample{\codefontspec\CodeInput}{}%
     \vskip.4cm
     \excaption
  \MakeBox
  }
%
%
% Example with only \CodeOutput shown.
\NewExample{outputonly}%
  {\altmono#}{#}%
  {%
   \ExampleBox
     \inexample{}{\CodeOutput}%
     \vskip.4cm
     \excaption
   \MakeBox
  }


% How the argument to \DescribeMacro
% is typeset.
\newif\ifmacroalready
\def\PrintMacro#1{%
  \unless\ifmacroalready
    \unless\ifdim\lastskip>0pt
      \vskip.1cm plus .05cm minus .05cm
    \fi
  \fi
  \macroalreadyfalse
  {\fontspec[Scale=.9]{Verdana}%
  \noindent
  \textcolor{\CodeColor}{\textbf{\llap{\textbullet\ }#1}}%
  \vskip.1cm plus .05cm minus .05cm
  \noindent
  }}
\DescribeIndexFont{\color{red!80!black}\bfseries}

  
% Generic macros used in the examples.
\def\blue{\textcolor{blue}}
\def\red{\textcolor{red}}
\def\green{\textcolor{green}}
\def\violet{\textcolor{blue!50!red}}
\let\bold\textbf
\let\italics\textit
\def\frame#1{\fbox{#1}}
\def\reverse#1{\reflectbox{#1}}


% Another big one.
\usepackage[pdfborder=0 0 0,xetex,pdfauthor={Paul Isambert},pdftitle={XeSearch user guide}]{hyperref}


% Miscellanea.
\newcount\remcount
\def\rem{%
  \advance\remcount1
  \par\noindent
  \the\remcount.~%
  }
\let\citex\textbf

\begin{document}

\makeatletter
\renewcommand*\l@section[2]{%
  \ifnum \c@tocdepth >\z@
    \addpenalty\@secpenalty
    \addvspace{0em}%
    \setlength\@tempdima{1.5em}%
    \begingroup
      \parindent \z@ \rightskip \@pnumwidth
      \parfillskip -\@pnumwidth
      \leavevmode \bfseries
      \advance\leftskip\@tempdima
      \hskip -\leftskip
      #1\nobreak\hfil \nobreak\hb@xt@\@pnumwidth{\hss #2}\par
    \endgroup
  \fi}
\makeatother
\appenddynamiccontents*{toc}{\vfill\footnotesize\tableofcontents}

\StartIgnore
\IfFileExists{random.tex}
  {\input{random.tex}%
  \leavevmode\generate}
  {%
\leavevmode\par
You should have a nice explosion of letters here,\par
but you don't have Donald Arseneau's \texttt{random.tex}.\par
Download if from CTAN and run XeLaTeX again.}
\StopIgnore
\par
\hfill\colorbox{red!80!black}{\kern1em\color{white}\Large\textbf{\Xe Search}\kern.6em}\par\vskip2pt
\hfill\colorbox{red!80!black}{\kern1em\color{white}\Large\textbf{\FileVersion}\kern.6em}\par\vskip2pt
\hfill\colorbox{red!80!black}{\kern1em\color{white}\Large\textbf{\FileDate}\kern.6em}\par\vskip2pt
\hfill\colorbox{red!80!black}{\kern1em\color{white}\Large\textbf{Paul Isambert}\kern.6em}\par\vskip2pt
\hfill\colorbox{red!80!black}{\kern1em\color{white}\Large\ttfamily\textbf{zappathustra@free.fr}\kern.6em}

\framebreak

\setcounter{section}{-1}
\section{Changes}
\long\def\Change#1#2{%
  \leftskip2cm
  \leavevmode
  \llap{\textbf{#1}\enspace}#2\par}

{%
\parindent0pt
\Change{\FileDate}
{%
Corrected for new versions of the XeTeX executable (thanks to Yuri Robbers):\par
The XeTeX executable now has an increased number of character classes to 4096. This 
has been accomodated by also increasing  xesearch's upper bound from 255 to 4095. Not 
changing xesearch would result in an error. For compatibility with other versions of
the executable, this solution has been implemented using system primitives rather than 
hard coded numbers.
}
\Change{2009/11/04}
{%
v.0.1 Corrected for ConTeXt (thanks to Wolfgang Schuster):\par
Now there's a third party file, \texttt{t-xesearch.tex}, so that
xesearch can be properly loaded with \texttt{\bslash usemodule[xesearch]}.\par
The clash between ConTeXt's \texttt{\bslash unexpanded} macro and XeTeX's
(actually $\varepsilon$-TeX's) \texttt{\bslash unexpanded} primitive has been fixed.}
\Change{2009/10/24}{Initial version}
}  

\framebreak

\strut
\vfill
\section{Introductory remarks}

\rem This set of macros requires the XeTeX engine.

\rem This set of macros is totally experimental.

\rem This set of macros is written with plain XeTeX, and so
it should be compatible with all formats, at least if
they implement such basic macros as "\newcount" or "\newif",
which is the case at least for LaTeX and ConTeXt.

\rem As a consequence of the preceding remark, I've used
in the examples of this documentation control sequences
that don't exist in any format (as far as I know) but whose
meaning is transparent enough, like "\blue" or "\italics", which
typeset \blue{blue} and \italics{italics}. They are not part of xesearch.

\rem This set of macros tweaks XeTeX's character class
mechanism badly. This mechanism was not designed to do what
it does here. Anyway, since it is used mainly for non-alphabetical
writing systems, there's little chance of clashing with xesearch.
I have tried to make xesearch compatible
with François Charette's \textsf{polyglossia} for language with
special punctuation pattern, like French. I have not tried 
to patch \textsf{babel} German shorthands in \textsf{polyglossia},
simply because I was not able to make them work.

\rem xesearch is local all the way down, that is, there's
not a single global command. So it can be used in a controlled way.%
\footnote{If your knowledge of TeX is confined to LaTeX, you might
not be very familiar with the notion of locality to groups, since
in LaTeX pretty much everything is global by default, whereas in
plain TeX the contrary holds. So to make things simple, just remember
that if you use xesearch inside a LaTeX environment, even one you've
defined yourself with \texttt{\bslash newenvironment}, nothing
will spread outside this environment. (I don't know the situation
for ConTeXt, so I won't say anything.)}

\rem To see what xesearch does, see \citex{example~\ref{simple}}
 on the right.

\extitle{simple}{A Simple Example}
\begin{sidex}
\SearchList{color}{\csname#1\endcsname{#1}}{blue,red,green}
This is blue and this is red and this is green,
but apparently yellow was not defined.
\end{sidex}

\rem To load the package in LaTeX, say
%
\begin{mainex}
\usepackage{xesearch}
\end{mainex}
%
\noindent In ConTeXt:
%
\begin{mainex}
\usemodule[xesearch]
\end{mainex}
%
\noindent In plain XeTeX:
%
\begin{mainex}
\input xesearch.sty
\end{mainex}


\clearpage
\section{Let's search}


\DescribeMacro{\SearchList\meta{*!}\marg{name}\marg{replacement text}\marg{list of words}}%
The star and exclamation mark are optional and their relative
order does not matter. Sticking to mandatory arguments for the moment,
here's how this macro works: first, you give
a \meta{name} to this list, for further reference. Then you specify
the \meta{replacement text}, which will be substituted for all of
the words in \meta{list of words} (separated by commas). In this
\meta{replacement text}, the substituted word is designed by "#1",
so just think about it as an argument to a control sequence. 
If you forget "#1", the word disappears (until we learn how to 
use the exclamation mark), as can be seen in \citex{example~\ref{vanish}}.

\extitle{vanish}{Words As Arguments}
\begin{sidex}
\SearchList{list1}{\italics{#1}}{obviously}
\SearchList{list2}{}{something}
Obviously, I have forgotten something.
\end{sidex}

Note that there's still a space between
\emph{forgotten} and the full stop. Where does it come from? Well, it
is the space that was between \emph{forgotten} and \emph{something}. At
the time when xesearch manipulates \emph{something}, this space has already
been read and typeset, so it does not disappear.

But there's something much more interesting in this example. As you might
have noticed, the first line says:
%
\begin{mainex}
\SearchList{list1}{\italics{#1}}{obviously}
\end{mainex}
%
\noindent and in the text to be searched we find `\texttt{Obviously}',
with an uppercase first letter. Nonetheless, it is found and
modified according to the replacement text. We thus discover one basic
principle of xesearch: \emph{it isn't case-sensitive by default}.
Hence the two following lists

\VerbCommand!()
%
\begin{mainex}
\SearchList{list1}{!ttslant(<whatever>)}{word}
\SearchList{list2}{!ttslant(<whatever>)}{Word}
\end{mainex}
%
%
\UndoVerbCommand
\noindent will find exactly the same set of words, namely `\texttt{word}'
`\texttt{Word}', `\texttt{woRd}', `\texttt{WORD}', etc.
How scary. This isn't customary in good programming and in TeX in
particular. Fortunately, this default setting can be easily changed:
the optional star just after "\SearchList" will make the entire list
case-sensitive. Besides, if a list is not case-sensitive, i.e. if it
has no star, a star before a word in that list will make the search
for that particular word case-sensitive.%
\footnote{However, if \texttt{\string\SearchList} is suffixed with
a star, all words in the list will be case-sensitive.}
This is illustrated in \citex{example~\ref{case}}.

\extitle{case}{Illustrating Case-Sensitivity}
\begin{sidex2}
\SearchList{Case insensitive}{\blue{#1}}{woRd}
Word word woRd WORD
\StopList{Case insensitive}
\end{sidex2}

\begin{sidex2}
\SearchList*{Case sensitive}{\red{#1}}{word}
Word word woRd WORD
\StopList{Case sensitive}
\end{sidex2}

\begin{sidex}
\SearchList{Mixed}{\green{#1}}{word,*Worm}
Word word woRd WORD\par
Worm worm woRm WORM\par
\end{sidex}

In this example we discover another macro, whose meaning is clear:


\DescribeMacro{\StopList\marg{list of lists}}
The lists, separated by commas, are turned off.

Let's turn back to "\SearchList" again. It can also take an exclamation
mark beside the star (the order between the two of them is not important).
In this case, the word is not subsituted anymore; i.e. the
replacement text will follow the word (still with "#1" standing for it).
These concatenating replacements
are very dangerous because they are expanded \emph{after} the search
has started again. You see what I mean: if the word you've found
does not endure some transformation that'll make it different from
itself as far as typesetting is concerned, ooops, here's the loop.
"WORD" expands to "WORD\command{WORD}" to "WORD\command{WORD\command{WORD}}",
etc., and there's no way out of it.

So, what's the point? The point is: the reason why those replacements
are placed after the no-search area has stopped is because they are
meant to host argument-taking commands to act on the rest of the streams.
Such commands can't be placed in normal replacement texts without
an exclamation mark, because they would stumble upon precisely
what starts the search again. So be careful. Either use "!"-marked
searches with non-typesetting macros, for instance to index the word,
or make sure that you know exactly the many interactions you might
create. The exclamation mark says it all.
\citex{Example~\ref{exclam}} is silly but I hope you can see the point.


\extitle{exclam}{A Silly One}
\begin{sidex2}
\SearchList*!{Hamlet}%
             { Or Not \StopSearching#1\StartSearching}%
             {To Be}
To Be...
\end{sidex2}

\begin{sidex2}
\SearchList!{typo}{\red{!!!}}{tipo}
There's a tipo here.
\end{sidex2}

\begin{sidex}
\SearchList!{XeTeX}{ \green}{is}
This is \XeTeX.\par
\end{sidex}

Note the space at the beginning of the first and third replacement
texts. Concatenating replacement texts (which replace nothing but
whatever) stick to their targets. Besides, in the third example,
"\green" would have gobbled the subsequent space.

I hope you have noticed that the "Hamlet" list contains not a word but
a phrase. So you know: xesearch can find phrases. Now we can't avoid
going into a little more detail concerning the way xesearch works.
But before that, let's see one simple macro:

\DescribeMacro{\AddToList\meta{*!}\marg{name}\marg{list of words}}
This adds the words to the \meta{name} list, which of course should
already exist. The presence or absence of a star and/or an
exclamation mark doesn't depend at all on the original list. You
can see that in \citex{example~\ref{add}}.

\extitle{add}{Adding Words To An Existing List\par\hfil(Another Silly One)}
\begin{sidex}
\SearchList{Stupid list}{\blue{#1}}{word}
Word and beep.
\AddToList*{Stupid list}{Beep}
Or Beep and word and beep.
\end{sidex}

Finally, the words in "\SearchList" and "\AddToList" should
be made of characters only, but these can be the product of
expansion. For instance, if you have "\def\word{a word}",
then you can say "\AddToList{mylist}{\word}". If anything
else shows up xesearch won't accept the word (and you'll
probably have a good deal of errors beforehand).


\section{What xesearch looks for and how it finds it}

xesearch can see only two things: letters and non-letters. Non-letters
it doesn't like because it's then forced to spit the letters it has
gathered and form a word, and most times it's not allowed to take it
away. (Un)fortunately, xesearch is quite short-sighted: it considers letters
what you tell it are not non-letters (xesearch apparently has some formal
education in philosophy).

More seriously (and clearly), xesearch forms a word as long as there are
letters. As you can see in \citex{example~\ref{macro}}, macros are expanded
and if they yield letters, xesearch can recognize a word.
%
\extitle{macro}{Macros Can't Hide Letters}%
\begin{sidex}
\SearchList{Will it find me?}{\blue{#1}}{word}
\def\rd{rd}
Here is a wo\rd.
\end{sidex}%
%
So when does it stop searching? There are two main cases:

\remcount0
\rem It encounters a space, or any primitive control sequence. The former
case is quite natural: you want spaces to delimit words (including "\skip"s
and associates). But the latter is less obvious: as soon as TeX does something
that is not typesetting letters, xesearch gives up. And this includes something
as seemingly innocuous as a "\relax", as you can see in \citex{example~\ref{relax}}.
That's the reason why, for instance, xesearch will never find \Protect{\texttt{TeX}
in \texttt{\bslash TeX}}: the definition contains many operations that aren't
strictly speaking putting letters in the stream. Fortunately, the bulk of
a manuscript is made of letters and spaces, and one seldom inserts "\relax"es
in the middle of words.

\extitle{relax}{But Primitive Can}
\begin{sidex}
\SearchList{This time I'm prepared}{\blue{#1}}{word}
\def\rd{\relax rd}
Here is a wo\rd.
\end{sidex}
%

\rem xesearch encounters a character that you've declared as a non-letter,
that is a word boundary. This leads us to the following macro:

\DescribeMacro{\MakeBoundary\marg{characters}}\macroalreadytrue
\DescribeMacro{\UndoBoundary\marg{characters}}
The characters should be simply put one after the other, as in for instance
\begin{mainex}
\MakeBoundary{,;:!}
\UndoBoundary{?()\{\}}
\end{mainex}
\noindent The basic set of such characters is as follows%
\footnote{That is: full stop, comma, semi-colon, colon, exclamation
mark, question mark, dash, inverted comma, apostrophe (i.e.
left and right quote), parentheses, brackets, curly braces. This
is rather arbitrary, despite some basic sensible assumptions.}
%
\begin{mainex}
.,;:!?-`'()[]{}
\end{mainex}
%
\noindent Now, if xesearch encounters a character that you've made
into a boundary, it will stop forming a word and evaluate what
it has gathered. Conversely, such characters cannot appear in
the list of words in "\SearchList"; they wouldn't be found anyway.
This is illustrated in \citex{example~\ref{boundary}}.
%
\extitle{boundary}{Where Words Start And Stop}
\begin{sidex2}
\MakeBoundary{/}
\SearchList{separated}{\ddag#1\ddag}{waka,jawaka}
Waka/Jawaka
\end{sidex2}

\begin{sidex}
\UndoBoundary{/}
\SearchList{united}{\ddag#1\ddag}{waka/jawaka}
Waka/Jawaka
\end{sidex}

There is one big difference between those two cases. Characters
defined as boundaries are not only word boundaries but also phrase
boundaries. If xesearch smells a possible phrase, spaces and
primitive commands won't stop it, whereas boundary characters will.
You can see that in \citex{example~\ref{phrase}}.
This example also illustrates one fact and one sad truth. The fact
is that words aren't searched for inside phrases; so the first two
\emph{you}'s were not turned to italics, since they belonged to
\emph{you are what you is}. The third one, one the other hand,
was recognized since \emph{you are neither good nor bad} was
missed because of the intervenig comma.


\extitle{phrase}{Phrases And Words}
\begin{sidex}
\SearchList{word}{\italics{#1}}{you}
\SearchList{phrases}{\red{#1}}
                    {you are what you is,
                     you are neither good nor bad}

You are what\kern1cm % What a kern!
you is but you are neither good, nor bad.
\end{sidex}

The sad truth is that the "\kern" disappeared. This is one
shortcoming of xesearch: primitives disappear
when they're in the middle of a possible phrase, even if that
phrase is not recognized in the end. By `possible phrase' I
mean a string of words that form the beginning of a phrase
that you want identified, e.g. the kern in

\VerbCommand![]
\begin{mainex}
\SearchList{H(a)unting primitives}{!ttslant[<whatever>]}%
           {xesearch feeds on kerns}
xesearch feeds on\kern1cm skips
\end{mainex}
\UndoVerbCommand

\noindent will disappear, even though no string
matches in the end. Hopefully such commands are
rather rare in the bulk of a document. If some
are unavoidable --- and for other uses too ---
there exists a pair of commands, whose function I
probably don't need to explain (except that "\StartSearching"
doesn't need to be issued at the beginning of your
document, it is there by default):

\DescribeMacro\StartSearching\macroalreadytrue
\DescribeMacro\StopSearching


\vskip-\lastskip
\section{(A very blunt form of) regular expressions}

Words are cool, and phrases too. But life doesn't always
reach their level of achievement. Sometimes you don't know
what you want. Something beginning with a `B', why not?
or maybe something that ends in `et'? Then look at
\citex{example~\ref{affixes}}.

\extitle{affixes}{Prefixes And Suffixes}
\begin{sidex}
\SearchList{Affixes}{\red{#1}}{*B?,?et,?ET}

A \italics{Black Page} in B, actually some kind of
duet for Terry Bozzio and Chad Wackerman, lay
on the drumset beside the PET facility.
\end{sidex}

There are several things to see in this example. First,
xesearch has entered the "\italics" command and imposed
its will.\footnote{Provided I'm using commands that don't
cancel each other, like plain TeX's \texttt{\bslash bf}
and \texttt{\bslash it}.} Next, affixes\footnote{I use the
word \emph{affixes} to refer to both \emph{prefixes}
(like \texttt{B?}) and \emph{suffixes} (like \texttt{?et}).
From a linguistic point of view, prefixes and suffixes
(and infixes, actually) are indeed affixes, but from the
same point a view, what we're talking about here has nothing
to do with prefixes or suffixes, just with bits of words.
I hope you don't mind.} are also sensitive
to case-sensitivity, so to speak, since \emph{beside} was
not identified ("*B?" being case-sensitive), whereas \emph{PET}
was found ("?et" not being case-sensitive). Note that a
word matches an affix search if it is at least as long
as the specified part of the affix. Thus, \emph{B} matches
"B?". So the question mark means `from zero to any number
of additional letters,' and  not `at least one additional letter.'

Phrases can take only suffixes, and they affect the last
word only. So

\VerbCommand!()
\begin{mainex}
\SearchList{list}{!ttslant(<whatever>)}{some interesting wor?}
\end{mainex}
\UndoVerbCommand

\noindent will find \texttt{some interesting world}, \texttt{some interesting words},
but not \texttt{some interesting word thesaurus}. An affix mark
anywhere else will have no effect.

Marking the unspecified part of a word with "?" is the only
possibility for the question mark to enter a "\SearchList",
and obviously it doesn't stand for itself. So, unless of 
course you undo it as a string boundary, "?" can appear
only at the beginning or the end of a word.\footnote{And if
a star is present, it should precede the question mark.}
In any other place, it will be recognized as a boundary
that has no right to be there and you'll be blamed. This
means that infixes don't exist in xesearch, i.e. you
can't say "B?et" to search for "bullet", for instance. Also,
you can't say "?ull?" to match "bullet". One affix at a time.

Finally, don't try to use a joker, i.e.

\VerbCommand!()
\begin{mainex}
\SearchList{list}{!ttslant(<whatever>)}{?}
\end{mainex}
\UndoVerbCommand

\noindent as an attempt to match all words. This won't work.%
\footnote{If you want to match all words\par
\noindent\texttt{\bslash SearchList\{list\}\{\ttslant{<whatever>}\}\{a?,b?,...,z?\}}\par
\noindent should do. Ok, now you've read it, you might have
the impression that the title of this section verges
on dishonesty. You might be right.}


\section{Search order(s)}

Now we shall see what happens when a word is matched by
several searches. There are three different cases:

\remcount0
\rem A word is matched by two or more strictly identical searches,
e.g.:
\VerbCommand!()
\begin{mainex}
\SearchList{list1}{!ttslant(<whatever>)}{word}
\SearchList{list2}{!ttslant(<whatever else>)}{word}
... word ...
\end{mainex}

\noindent\rem A word is matched by two or more prefixes
or two or more suffixes identical in case-sensitivity, e.g.:
\begin{mainex}
\SearchList{list1}{!ttslant(<whatever>)}{*wor?}
\SearchList{list2}{!ttslant(<whatever else>)}{*wo?}
... word ...
\end{mainex}

\noindent\rem A word is matched by two or more different
searches, e.g.:
\begin{mainex}
\SearchList{list1}{!ttslant(<whatever>)}{*wor?}
\SearchList{list2}{!ttslant(<whatever else>)}{word}
\SearchList{list3}{!ttslant(<anything>)}{?ord}
... word ...
\end{mainex}
\UndoVerbCommand

\vskip-\lastskip
\subsection{Strictly identical searches}
In this case, the word will execute all the
replacement texts. Their interactions depend
on the way they are defined: the replacement
texts that are defined without an exclamation
mark take as arguments the replacement texts
that are defined just before them and will themselves
become arguments to subsequent replacement texts.
See \citex{example~\ref{nested}}

\extitle{nested}{Nested Replacement Texts}
\begin{sidex}
\SearchList{list1}{\blue{#1}}{blue word}
\SearchList{list2}{\dag#1\dag}{blue word}
\SearchList{list3}{\ddag#1\ddag}{blue word}

This blue word wears earrings and is equivalent
to \ddag\dag\blue{term}\dag\ddag.
\end{sidex}

If the replacement texts are defined with and exclamation
mark, they are simply concatenated, and most importantly,
their argument is the word itself alone, not the accumulation
of previous remplacement texts. See \citex{example~\ref{concatenation}}.
Of course, if a word is matched by both kinds of replacement
texts, the same rules apply, as in \citex{example~\ref{both}},
where you can also be entertained by some not-very-fun-but-%
you-can-hopefully-see-the-point-again fiddling with !-marked
macros. If you want to know what those three "\expandafter"s
are doing here, see section~\ref{texnic}.


\extitle{concatenation}{Concatenation\par\hfil(Yet Another Silly Example)}
\begin{sidex}
\SearchList!{list1}{+}{wor?}
\SearchList!{list2}{\dag}{wor?}
\SearchList!{list3}{\ddag}{wor?}
This word is a freight train.
\end{sidex}


\extitle{both}{Everything Together\par\hfil(This Is Mind-Blowing)}
\begin{sidex}
\SearchList{list1}{\green{#1}}{*?ORD}
\SearchList{list2}{\ddag#1\ddag}{*?ORD}
\def\whisper#1{\italics{ (#1)}}
\def\ingreen{in green}
\SearchList!{list3}
            {\expandafter\expandafter\expandafter\whisper}
            {*?ORD}
\SearchList!{list4}{\ingreen}{*?ORD}
This WORD must be upset.
\end{sidex}


\subsection{Affixes with identical characteristics}

When a word is found by two or more affixes of the
same kind (i.e. only prefixes or only suffixes) and
with the same case-sensitivity, then you decide. xesearch
provides the following commands:

\DescribeMacro{\SortByLength\meta*\marg{pPsS}}\macroalreadytrue
\DescribeMacro{\DoNotSort\marg{pPsS}}\macroalreadytrue
\DescribeMacro{\SearchAll\marg{pPsS}}\macroalreadytrue
\DescribeMacro{\SearchOnlyOne\marg{pPsS}}
"p", "P", "s" and "S" are shorthands for (respectively)
`case-insensitive prefix', `case-sensitive prefix',
`case-insensitive suffix' and `case-sensitive suffix'.
They refer to the type of affix to modify and those commands
can take one or several of them, e.g. "\SearchAll{pSP}".
By default, affixes follow the same rules as full words:
each replacement text will take the replacement text
defined just before as argument.
But you can also create an order between them:
with "\SortByLength", longer affixes match words before
shorter ones, and their replacement texts are thus more
deeply nested; adding a star to "\SortByLength" reverses
the order: shorter affixes before longer ones. "\DoNotSort"
resets to default, i.e. replacement texts follow the order
in which they were defined. See \citex{example~\ref{affixorder}}.


\extitle{affixorder}{This Is Fascinating}
\begin{sidex}
\SearchList{Three letters}{\ddag#1\ddag}{*adv?}
\SearchList{Two letters}{\red{#1}}{*ad?}
\SearchList{Four letters}{\dag#1\dag}{*adve?}

\SortByLength{P} adverb
\SortByLength*{P} adverb
\DoNotSort{P} adverb
\end{sidex}

"\SearchAll" and "\SearchOnlyOne" sets what should happen
when a word is matched by an affix: shall the search stop,
or shall xesearch continue to investigate whether other
affixes might fit too? By default, all affixes are
tested, but you might want a different behavior. Thus
"\SearchOnlyOne{PS}" will make case-sensitive prefixes and
suffixes search only once (and thus the order defined
just before becomes extremely important) while "\SearchAll{PS}"
will return to default, as illustrated in \citex{example~\ref{searchall}}.


\subsection{Different searches}

%
% The example is after the new section so it'll go on
% the right page.
%

\extitle{searchall}{This Guy Sure Ain't No David Foster Wallace}
\begin{sidex}
\SearchList{just a list}{\blue{#1}}{bl?,*bo?}
\SearchList{just another list}{\bold{#1}}{blu?,*bol?}

\SearchOnlyOne{P} Blue and bold and
\SortByLength{P} bold and blue.
\end{sidex}


Finally, we have to see what xesearch should do when several
searches match a word. Once again, you decide, thanks
to the following command:

\DescribeMacro{\SearchOrder\marg{order and inhibitions}}
You know what "p", "P", "s" and "S" mean; "f" and "F" mean
`case-insensitive full word' and `case-sensitive full word.'
In the macro above, \meta{order and inhibitions} is a list
of one or more sequences like
\texttt{f\red{!ps};} (with the semi-colon as part of the expression)
in which the red part is optional and which means: if a word
matches a full-word case-insensitive search, then xesearch
will not  test case-insensitive prefixes and suffixes on this
word. Such declarations are put one after the other, and this
defines the search order. For instance, the default order for
xesearch is:

\begin{mainex}
\SearchOrder{
  F!fPpSs;
  f!PpSs;
  P!pSs;
  p!Ss;
  S!s;
  s;
  }
\end{mainex}

\noindent and it simply means that full words should be searched for
before prefixes, and prefixes before suffixes, with case-sensitive
search first in each case, and that any
successful search inhibits any subsequent test. You can have
as many sequences as you wish. If XeTeX goes crazy and never terminates,
then you've probably forgotten a semi-colon (I do it very frequently).
See \citex{example~\ref{order}} for an illustration.

\extitle{order}{Search Order}
\begin{sidex}
\SearchList{word}{\green{#1}}{*Word}
\SearchList{prefix}{\frame{#1}}{wor?}
\SearchList{suffix}{\reverse{#1}}{?ord}

\SearchOrder{F;p;s;}
This Word is well-matched.

\SearchOrder{F!p;p;S;}
This Word is not so well-matched anymore.

\SearchOrder{f;}
This Word is not matched at all.
\end{sidex}

Remember that e.g. "word?" will find `"word"' as a prefix, not
as a full word, so that `"word"' will not be found if you say
for instance "\SearchList{list}{"\ttslant{<whatever>}"}{word?}"
and "\SearchOrdef{f;}". Finally,
although something like "\SearchOrder{f;}" is perfectly okay
to search for case-insensitive full words only, 
"\SearchOrder{;}" will only make XeTeX crazy; "\StopSearching"
is simpler. 


\section{Some TeX\relax nical matters}\label{texnic}

This section is not vital to the comprehension of xesearch,
but it may be useful.

\DescribeMacro\PrefixFound\macroalreadytrue
\DescribeMacro\SuffixFound\macroalreadytrue
\DescribeMacro\AffixFound
When a word is found thanks to an affix search, the prefix
or suffix used is stored in the relevant macros. If there
are several matching affixes, the last prefix and the last
suffix win in their respective categories, and between them
the same rule apply for "\AffixFound". These macros are available
as long as the search has not started again, i.e. they're
fully available in normal replacement texts, but in
\texttt{!}-marked definitions they're erased as soon as
a letter is typeset, so they can be used only at the very
beginning. The rest of the time they are empty.

The affix itself respects the case in which it was declared
if it is case-sensitive, but it is in lowercase otherwise,
however it was fed to "\SearchList". See \citex{example~\ref{found}}.

\extitle{found}{Finding Affixes}
\begin{sidex2}
\SearchList{A case-sensitive suffix}{Suf\blue\SuffixFound}{*?FiX}
SufFiX.
\end{sidex2}
\begin{sidex}
\SearchList{A case-insensitive affix}{\blue\AffixFound fix}{Pre?}
PREfix.
\end{sidex}

\DescribeMacro\PatchOutput\macroalreadytrue
\DescribeMacro\NormalOutput
By default, xesearch doesn't patch the output routine so 
footers and headers are searched. This can be done by these
two commands. "\PatchOutput" should of course be issued
after any modification to the output routine. "\NormalOutput"
restores the value of the output routine at work when 
"\PatchOutput" was executed.

\DescribeMacro\PatchTracing\macroalreadytrue
\DescribeMacro\NormalTracing
If you want to give a look at your log file with some
tracing on, you will find hundreds if not thousands
of totally uninformative lines. That's xesearch
recursively discovering new letters and testing
words. With "\PatchTracing", xesearch will try to
keep quiet during those painful moments, i.e.
"\tracingcommands" and "\tracingmacros" will be
turned to zero. It can't
exactly be totally silent, so just know that all
its words begin with "xs@". "\NormalTracing"
lets xesearch express itself again.

Now just consider \citex{example~\ref{catcode}}. When
xesearch reads the input, it introduces itself to
all the letters it doesn't know. Most importantly,
it writes down some information about them, like
their catcode. Now, if a letter is met with a given
category catcode, that's the way xesearch will remember
it, and this will influence how prefixes and suffixes
are recognized. More precisely: the identification
of a letter (e.g. the first occurence of it in the
typestting stream) and its definition as part of an
affix should be done under the same category code.

% Ok, I had to do this in advance because of
% the internal mechanisms of the CodeDoc class
% and the flowfram package.
%
\catcode`\Z=12
\SearchList{makeZ}{}{Z}
\StopList{makeZ}
\catcode`\Z=11

\extitle{catcode}{The Mysterious Z}
\begin{sidex}
\catcode`\Z=12
Here's a Z.
\catcode`\Z=11

\SearchList{fz}{\italics{#1}}{Frank Zap?}
Look, here comes Frank Zappa!

\StopList{fz}
\catcode`\Z=12
\SearchList{true fz}{\italics{#1}}{Frank Zap?}
One more time for the world.
Here comes Frank Zappa!
\end{sidex}

Note that in \citex{example~\ref{catcode}} I first
had to stop the "fz" list, otherwise the prefix
"Frank Zap?" would not have been recreated. Another
solution would have been to create another prefix
like "Frank Za?" or "*Frank Zap?".

Finally, here's how replacement texts are processed.
Suppose you have:

\begin{mainex}
\SearchList{listone}{\italics{#1}}{word}
\SearchList{listtwo}{\blue{#1}}{word}
\SearchList{listthree}{\bold{#1}}{word}
\end{mainex}

\noindent then xesearch does something like this:

\begin{mainex}
\def\command@listone#1{\italics{#1}}
\def\command@listtwo#1{\blue{#1}}
\def\command@listthree#1{\bold{#1}}
\end{mainex}

\noindent and when "word" is encountered it is turned to

\begin{mainex}
\expandafter\command@listthree\expandafter{%
  \expandafter\command@listtwo\expandafter{%
    \expandafter\command@listone\expandafter{\WORD}}}
\end{mainex}

\noindent where "\WORD" contains exactly "word"; as you
can see, this is equivalent to

\begin{mainex}
\command@listthree{\command@listtwo{\command@listone{word}}}
\end{mainex}

\noindent which you won't have failed to notice is not equivalent
to

\begin{mainex}
\bold{\blue{\italics{word}}}
\end{mainex}

\noindent although in this example the difference is immaterial.
Now, if you really want three expansions with superior
precision on one word, you probably don't need xesearch:
just use a good old macro instead.

Finally, !-marked replacement texts are simply concatenated,
as in:

\begin{mainex}
\expandafter\command@listone\expandafter{\WORD}
\expandafter\command@listthree\expandafter{\WORD}
\expandafter\command@listtwo\expandafter{\WORD}
\end{mainex}

\noindent Now you can see the reason for the three "\expandafter"'s in
\citex{example~\ref{both}}.


\section{Examples}
xesearch was first designed as the basis for the
\textsf{\Xe Index} package, an automatic indexing package
for XeLaTeX. It developped into a stand-alone
project, and standing so alone that there are no other application yet.
So here are some ideas.

First, this document has the following list:

\begin{mainex}
\SearchList*{logos}{\csname#1\endcsname}{?TeX,?ConTeXt,xesearch}
\end{mainex}

\noindent (with "\xesearch" properly defined beforehand) so throughout this
document I was able to type \Protect{`\texttt{xesearch can do this or that}'}
to produce `xesearch can do this or that'. That's not fascinating
but it was a test.

Being a linguist I can also directly paste examples from my
database and turn on xesearch to highlight some words. For instance,
suppose you're studying the grammaticalization of, say, \emph{going to}
in English,\footnote{If you're a linguist, I apologize for my lack
of originality.} and you have many examples. Then you just create
a command like "\startexample", or patch an existing command to activate
xesearch just for this stretch of text, among other things. For
instance:

\VerbCommand!()
\begin{mainex}
\SearchList{goingto}{\bold{#1}}{going to}
\def\startexample{%
  !ttslant(!color(!CodeColor)Here you can modify margins, for instance.)
  \StartSearching
  }
\def\stopexample{%
  \StopSearching
  !ttslant(!color(!CodeColor)Here you restore previous values.)
  }
\end{mainex}
\UndoVerbCommand

\noindent Otherwise you can locally use "\StopList" if
you're searching the rest of the document too.

What follows are some sketchy ideas. Concerning syntax highlighting,
I won't try to compete with the \textsf{listings} package.


\subsection{Spelling}

\extitle{spelling}{The Words In Red Don't Belong To The Top 40,000}
\begin{outputonly}
\IfFileExists{english.dic}
  {\input{english.dic}%
  \SearchList{spelling}{%
    \lowercase{\ifcsname##1@wordlist\endcsname}%
      ##1%
    \else
      \red{##1}%
    \fi}
    {a?,b?,c?,d?,e?,f?,g?,h?,i?,j?,k?,l?,m?,
    n?,o?,p?,q?,r?,s?,t?,u?,v?,w?,x?,y?,z?}
  \SearchOrder{p;}  
  }%
  {%
  \emph{Unrecognized words should be in red, but you
  should create \texttt{english.dic} beforehand}%
  }

Stately, plump Buck Mulligan came from the stairhead, bearing a bowl of lather on which a mirror and a razor lay crossed. A yellow dressinggown, ungirdled, was sustained gently behind him on the mild morning air. He held the bowl aloft and intoned:

--- \textit{Introibo ad altare Dei}.

Halted, he peered down the dark winding stairs and called out coarsely:

--- Come up, Kinch! Come up, you fearful jesuit! 

Solemnly he came forward and mounted the round gunrest. He faced about and blessed gravely thrice the tower, the surrounding land and the awaking mountains. Then, catching sight of Stephen Dedalus, he bent towards him and made rapid crosses in the air, gurgling in his throat and shaking his head. Stephen Dedalus, displeased and sleepy, leaned his arms on the top of the staircase and looked coldly at the shaking gurgling face that blessed him, equine in its length, and at the light untonsured hair, grained and hued like pale oak. 

Buck Mulligan peeped an instant under the mirror and then covered the bowl smartly.

--- Back to barracks! he said sternly.

He added in a preacher's tone:

--- For this, O dearly beloved, is the genuine Christine: body and soul and blood and ouns. Slow music, please. Shut your eyes, gents. One moment. A little trouble about those white corpuscles. Silence, all.
\end{outputonly}


Here's a recipe to create an English spellchecker. Take
the list of the 40,000 most frequent words of English
by Wiktionary: \url{http://en.wiktionary.org/wiki/Wiktionary:Frequency_lists#English}.
Use TeX to turn it into a file, say "english.dic",
whose only content is "\csname"\texttt{\ttslant{<word>}}"@dic\endcsname"
for each word of the list, with \ttslant{<word>} in lowercase. What! you exclaim,
that creates 40,000 control sequences! True. But TeX
distributions can easily do that today. Input "english.dic"
at the beginning of your document. Then set up xesearch as follows:

\begin{mainex}
\SearchList{spelling}{%
  \lowercase{\ifcsname#1@dic\endcsname}%
    #1%
  \else
    \red{#1}%
  \fi}
  {a?,b?,c?,d?,e?,f?,g?,h?,i?,j?,k?,l?,m?,
  n?,o?,p?,q?,r?,s?,t?,u?,v?,w?,x?,y?,z?}
\SearchOrder{p;}
\end{mainex}

\noindent Now, for each word, xesearch checks whether it
belongs to the frequency list. If it doesn't, it puts it
in red, thus signaling a likely spelling error. It could
also issue an error message, or whatever.

Some words will never belong to that list. Then we use a
simple macro to add them beforehand:

\begin{mainex}
\def\AddWord#1{\lowercase{\csname#1@dic\endcsname}}
\end{mainex}

\noindent We could also create more specific macros like
"\AddRegularVerb" which from e.g. "change" would add "change",
"changes", "changed", "changing". TeX could also rewrite
"english.dic" on the fly so there'd be no need to respecify
those words on every document. And so on and so forth.

Using a list like the frequency list is important because
we want all forms of a word to appear; i.e. organized
word lists have "hear" and not "hears", because there
exists either an algorithm or at least the user's brain
to derive "hears" from "hear".


\subsection{Word count}
Another simple use of xesearch is counting words in a document.
We define a case-insensitive list with all letters as prefixes,
so all words will be matched (we could add numbers too), as
we did in the previous example. Supposing
we want words like \emph{don't} to be counted as one word, then
we remove the apostrophe from the word boundaries (in case it
signals a dialogue, the following space will delimit the word
anyway). And we define the search order as case-sensitive
prefixes only, because we don't need anything else. The
"\shownumber" macro is clear, I believe. In the first version
of the text on the right it is "\let" to "\relax". It's just
for fun.

The "\advance" on "\wordcount" has to be "\global" because
there might be (hidden) groups in the text, for instance
in font-changing commands.

\begin{mainex}
\newcount\wordcount
\def\shownumber{%
  \raise.6\baselineskip\hbox to0pt{\hss\tiny\red{\the\wordcount}}
  }
\SearchList!{wordcount}{\global\advance\wordcount1\shownumber{}}
  {a?,b?,c?,d?,e?,f?,g?,h?,i?,j?,k?,l?,m?,
  n?,o?,p?,q?,r?,s?,t?,u?,v?,w?,x?,y?,z?}
\UndoBoundary{'}
\SearchOrder{p;}
\end{mainex}

\extitle{wordcount}{Counting Words}
\begin{outputonly}
\newcount\wordcount
\let\shownumber\relax
\SearchList!{wordcount}{\global\advance\wordcount1\shownumber{}}
{a?,b?,c?,d?,e?,f?,g?,h?,i?,j?,k?,l?,m?,n?,o?,p?,q?,r?,s?,t?,u?,v?,w?,x?,y?,z?}
\UndoBoundary{'}
\SearchOrder{p;}

Stately, plump Buck Mulligan came from the stairhead, bearing a bowl of lather on which a mirror and a razor lay crossed. A yellow dressinggown, ungirdled, was sustained gently behind him on the mild morning air. He held the bowl aloft and intoned:

--- \textit{Introibo ad altare Dei}.

Halted, he peered down the dark winding stairs and called out coarsely:

--- Come up, Kinch! Come up, you fearful jesuit!

Solemnly he came forward and mounted the round gunrest. He faced about and blessed gravely thrice the tower, the surrounding land and the awaking mountains. Then, catching sight of Stephen Dedalus, he bent towards him and made rapid crosses in the air, gurgling in his throat and shaking his head. Stephen Dedalus, displeased and sleepy, leaned his arms on the top of the staircase and looked coldly at the shaking gurgling face that blessed him, equine in its length, and at the light untonsured hair, grained and hued like pale oak. 


{\leavevmode\par\StopList{wordcount}
\emph{There are \the\wordcount\ words.}}

\leavevmode\par

\def\shownumber{\raise.6\baselineskip\hbox to0pt{\hss\tiny\red{\the\wordcount}}}

Buck Mulligan peeped an instant under the mirror and then covered the bowl smartly.

--- Back to barracks! he said sternly.

He added in a preacher's tone:

--- For this, O dearly beloved, is the genuine Christine: body and soul and blood and ouns. Slow music, please. Shut your eyes, gents. One moment. A little trouble about those white corpuscles. Silence, all.

\leavevmode\par

\StopList{wordcount}
\emph{The total number of words is: \the\wordcount.}
\end{outputonly}


\subsection{Syntax highlighting: TeX}

At first I'd designed a colorful scheme but it was
ugly, so here's something much more sober. We simply
create an empty list in which we design a macro
to add "\string"ed primitive commands.

\begin{mainex}
\SearchList{hilitex}{\bold{#1}}{}
\def\Add#1{%
  \AddToList{hilitex}{#1}%
  }
\expandafter\Add\expandafter{\string\def}
\expandafter\Add\expandafter{\string\expandafter}
\expandafter\Add\expandafter{\string\else}
\expandafter\Add\expandafter{\string\fi}
\expandafter\Add\expandafter{\string\else}
\end{mainex}

\noindent We can't do that for prefixes (and we need
them if we want e.g. to underline all user-defined "\if"),
because they would be "\string"ed and thus of category
code 12, which \citex{example~\ref{catcode}} has shown
was a trouble. So we design a macro to add words with
a backslash added beforehand. And we use it.

\begin{mainex}
\def\gobble#1{}
\def\AddPrefix#1{%
  \AddToList*{hilitex}{\expandafter\gobble\string\\#1?}%
  }
\AddPrefix{new} \AddPrefix{if}
\end{mainex}

We need one last thing. We want "\" to be recognized as
a letter, because it should be put in bold too. But we
also want it to be recognized as a string boundary. The
only solution is to make it active and let it expand
to "\relax" (a natural string boundary) plus itself
in catcode 12 (which is not defined with "\MakeBoundary"
and is thus a letter for xesearch).

\goodbreak
\begin{mainex}
\catcode`\|=0
\catcode`\\=13
|def\{|relax|string\}
\end{mainex}

\noindent If we pack everything into an usual macro to
make verbatim text, then we obtain something along the
lines of \citex{example~\ref{hilite}}. Don't forget the
typewriter font for the real thrill!

The implementation section of this documentation displays
a subtler kind of syntax highlighting, viz. "\def" and
associates put the following command in red and index it
too, except commands I don't want to see treated as such,
like temporary commands. However, the implementation
depends on CodeDoc's macros, so I won't show it here,
although you can look at the source.

%
% TeX example...
\extitle{hilite}{\TeX\ Highlighted}

\def\codefontspec{\fontspec{Courier New}\large}

\bgroup
\catcode`\|=0
\catcode`\\=13
|gdef|makeescape{%
  |catcode`|\=13
  |gdef\{|relax|string\}
  }
|egroup

\def\CodeSpec{%
  \def\bold##1{%
    {\fontspec[FakeBold=1.5,Scale=.85]{Excalibur Monospace}##1}%
    }%
  \SearchList{hilitex}{\bold{##1}}{}%
  \def\Add##1{%
    \AddToList*{hilitex}{##1}%
    }%
  \expandafter\Add\expandafter{\string\def}%
  \expandafter\Add\expandafter{\string\expandafter}%
  \expandafter\Add\expandafter{\string\else}%
  \expandafter\Add\expandafter{\string\fi}%
  \expandafter\Add\expandafter{\string\else}%
  \def\gobble##1{}%
  \def\AddPrefix##1{%
    \expandafter\expandafter\expandafter\Add\expandafter\expandafter\expandafter{%
      \expandafter\gobble\string\\##1}%
    }%
  \AddPrefix{new?} \AddPrefix{if?}%
  }%

\VerbCommand!()
\begin{hilite}
!makeescape()!CodeSpec()
\def\mycommand#1{%
  \expandafter\myothercommand#1%
  \ifwhatever
    \newtoks\mytoks
    \mytoks={...}%
  \else
    \mytoks={...}%
  \fi
  }
\end{hilite}
\UndoVerbCommand


\subsection{Syntax highlighting: HTML}

Coloring HTML is rather easy. The most complicated
part concerns word boundaries. xesearch is used
to find elements and attributes. Only case-insensitive
full words need to be searched for.

\begin{mainex}
\MakeBoundary{<>/=}
\SearchList{elements}{\bold{\violet{#1}}}
           {html,meta,head,body,span,p,div,b,h1,img}
\SearchList{attributes}{\bold{#1}}{align,class,style,src}
\SearchOrder{f;}
\end{mainex}

\noindent
"<" and ">" delimit markup, so we use them to switch
xesearch on and off.

\begin{mainex}
\catcode`\<=13
\catcode`\>=13
\def<{\bgroup\catcode`\'=13\catcode`\"=13\char`\<\StartSearching{}}
\def>{\egroup\char`\>}
\end{mainex}

\noindent
Quoted text should not be searched, because values
to attributes are simply put in blue. Double quotes
and single quotes should exclude each other.

\begin{mainex}
\catcode`\"=13
\newif\ifdbbegin
\def"{%
  \unless\ifsgbegin
    \ifdbbegin \egroup \char`\"
    \else \char`\" \bgroup \dbbegintrue \color{blue}\StopSearching
    \fi
  \fi
  }
\catcode`\'=13
\newif\ifsgbegin
\def'{%
  \unless\ifdbbegin
    \ifsgbegin \egroup \char`\'
    \else \char`\' \bgroup \sgbegintrue \color{blue}\StopSearching
    \fi
  \fi
  }
\end{mainex}

\noindent
"src" and "href" take links as values, usually
underlined. So we do just that.

\begin{mainex}
\SearchList!{links}{\makelink}{src,href}
\def\makelink=#1{%
  \ifx#1"
    \expandafter\makedbqlink
  \else
    \expandafter\makesgqlink
  \fi
  }
\def\makedbqlink#1"{\StopSearching="\underline{#1}"\StartSearching}
\def\makesgqlink#1'{\StopSearching='\underline{#1}'\StartSearching}
\end{mainex}


\noindent
The "&...;" character denotation is often in red.

\begin{mainex}
\catcode`\&=13
\def{%
  \char`\&
  \red{#1;}%
  }
\end{mainex}

\noindent
Finally we turn off TeX's special characters (quotes
are made active by "<" and ">"), and we make some
useful adjustments.

\begin{mainex}
\catcode`\"=12
\catcode`\'=12
\catcode`\#=12	
\catcode`\_=12
\catcode`\^=12
\catcode`\%=12
\obeylines
\def\par{\leavevmode\endgraf}
\parindent0pt
\end{mainex}


\citex{Example~\ref{html}} shows the bottom of the CTAN page.


\bgroup
\catcode`\&=13
\catcode`\"=13
\catcode`\'=13
\catcode`\<=13
\catcode`\>=13
\gdef\makehtmlchar{%
  \catcode`\&=13
  \def&##1;{%
    \char`\&
    \bgroup\color{red}##1;\egroup
    }%
  \newif\ifdbbegin
  \def"{%
    \unless\ifsgbegin
      \ifdbbegin
        \egroup
        \char`\"
      \else
        \char`\"
        \bgroup
        \dbbegintrue
        \color{blue}%
        \StopSearching
      \fi
    \fi
    }%
  \newif\ifsgbegin
  \def'{%
    \unless\ifdbbegin
      \ifsgbegin
        \egroup
        \char`\'
      \else
        \char`\'
        \bgroup
        \sgbegintrue
        \color{blue}%
        \StopSearching
      \fi
    \fi
    }%
  \def<{\bgroup\catcode`\'=13\catcode`\"=13\char`\<\StartSearching{}}%
  \def>{\egroup\char`\>}%
  }

\gdef\makehtml{%
  \makehtmlchar
  \MakeBoundary{<>/=}%
  \catcode`\<=13
  \catcode`\>=13
  \SearchList{elements}{\textbf{\textcolor{blue!50!red}{##1}}}{p,a,div,hr,table,tr,td,body,html,span}%
  \SearchList{attributes}{\textbf{##1}}{href,width,id,align}%
  \SearchList!{links}{\makelink}{src,href}%
  \SearchOrder{f;}%
  \def\makelink=##1{%
    \ifx##1"
      \expandafter\makedbqlink
    \else
      \expandafter\makesgqlink
    \fi
    }
  \def\makedbqlink##1"{\StopSearching="\underline{##1}"\StartSearching}%
  \def\makesgqlink##1'{\StopSearching='\underline{##1}'\StartSearching}%
  \catcode`\"=12
  \catcode`\'=12
  \catcode`\#=12	
  \catcode`\_=12
  \catcode`\^=12
  \catcode`\%=12
  \obeylines
  \def\par{\leavevmode\endgraf}%
  \parindent0pt%
  \StopSearching
}%
\egroup

\def\codefontspec{\ttfamily}
\def\myskip{-.5em}

\extitle{html}{Colorful HTML}
\VerbCommand!()
\begin{hilite}
!makehtml()!color(black)!leftskip!myskip
...
<p>
A perhaps less taxing way to express your appreciation
is to make a 

<a href="https://www.tug.org/donate.html#ctan">donation</a>&nbsp;&mdash;
small efforts add up!
</p>


<div id='footer'><hr />
<table width='100%'>
  <tr>
    <td align='left'>
      <span id='footer_author'>Site sponsor:
        <a href='http://www.tug.org'>TeX Users Group</a></span></td>
    <td>
      <span id='footer_middle'>Internet connection provided by
        <a href='http://www.smcvt.edu'>St Michael's College</a></span></td>
    <td align='right'>
      <span id='footer_home'>
      <a href='/what_is_ctan.html'>What is CTAN?</a></span></td>
  </tr>
</table>

</div>

</body>
</html>
\end{hilite}
\UndoVerbCommand


\framebreak


\section{Implementation}


\newif\ifdef
\def\PrintMacro#1{}

\def\csprefix{cs}
\def\exprefix{ex}
{\catcode`\#=12
\gdef\hash{#}
\catcode`\%=12
\gdef\com{%}
}

\def\makedef#1#2#3{%
  \def\temp{#2#3}%
  \ifx\temp\csprefix
    \def\defnext{%
      #1#2#3%
      }%
  \else
    \ifx\temp\exprefix
      \def\defnext{%
        #1#2#3%
        }%
    \else
      \def\temp{#1}%
      \ifx\temp\hash
        \def\defnext{%
          #1#2#3%
          }%
      \else
        \ifx\temp\com
          \def\defnext{%
            #1#2#3%
            }%
        \else
          \deftrue
          \def\defnext{#2#3}%
        \fi
      \fi
    \fi
  \fi\defnext	
  }

\def\codesearch{%
  \StopList{logos}%
  \MakeBoundary{\\}%
  \MakeBoundary{\#}%
  \MakeBoundary{=}%
  \MakeBoundary{\%}%
  \SearchList{deff}{%
  \ifdef
    \deffalse
    \IgnorePrefix{xs@}\expandafter\DefineMacro\expandafter{\csname##1\endcsname}%
    \textcolor{red}{\char`\\##1}%
  \else
    ##1%
  \fi}{xs@?,a?,b?,c?,d?,e?,f?,g?,h?,i?,j?,k?,l?,m?,n?,o?,p?,q?,r?,s?,t?,u?,v?,w?,x?,y?,z?}%
  \SearchList{defff}{%
  \ifdef
    \deffalse
    \IgnorePrefix{xs@@}\expandafter\DefineMacro\expandafter{\csname##1\endcsname}%
    \textcolor{red}{\char`\\##1}%
  \else
    ##1%
  \fi}{xs@@?}%
  \SearchList*{nodeff}{\ifdef\deffalse\char`\\##1\else##1\fi}
    {xs@temp,xs@next,xs@String,xs@cs,xs@Phrase,xs@Sign,xs@TempWord,%
    xs@templist,xs@@String,xs@WhatNext,xs@NoReplace,xs@Delimiters,xs@Stack,%
    xs@ParseState,xs@TempNum,xs@TempDef,xs@Finalstring,xs@TempAffix,xs@@temp}%
  \SearchList!*{def}{\makedef}{?def,let}%
  \SearchOrder{F!Sp;S!p;p;}%
  \SortByLength{p}%
  \SearchOnlyOne{p}%
  }

\RenewExample[continuous]{code}%
	{\ttfamily\small#}{}%
	{%
	\begin{parcolumns}[colwidths={1=10cm},distance=1.2cm]{2}%
	  \parindent0pt%
	  \colchunk{\Comment}%
	  \codesearch
	  \colchunk{\vskip-2\baselineskip\strut\par\CodeInput}%
	\end{parcolumns}%
	\gdef\Comment{}%
	}
\LineNumber{code}{\rmfamily\scriptsize}{0pt}

\catcode`\§13
\long\def§#1§{\def\Comment{\noindent#1}}
\ShortCode/
\def\mac#1{\texttt{\bslash#1}}


\subsection{First things first}

§%
First we look for XeTeX.
§
/
\ifx\csname XeTeXrevision\endcsname\relax
  \errmessage{You need XeTeX to run xesearch. It won't be loaded.}
  \expandafter\endinput
\else
  \expandafter\ifx\csname xs@ChangeCatcodes\endcsname\relax
  \else
    \expandafter\expandafter\expandafter\endinput
  \fi
\fi
/

§%
These will be used to keep a constant punctuation
in spite of catcode-changing packages like \texttt{babel}.
§
/
\catcode`@=11
\def\xs@ChangeCatcodes{%
  \chardef\xs@questioncode=\catcode`\?%
  \chardef\xs@exclamationcode=\catcode`\!%
  \chardef\xs@commacode=\catcode`\,%
  \chardef\xs@starcode=\catcode`\*%
  \chardef\xs@semicoloncode=\catcode`\;%
  \catcode`\?12
  \catcode`\!12
  \catcode`\,12
  \catcode`\*12
  \catcode`\;12
  }
\def\xs@RestoreCatcodes{%
  \catcode`\?\xs@questioncode
  \catcode`\!\xs@exclamationcode
  \catcode`\,\xs@commacode
  \catcode`\*\xs@starcode
  \catcode`\;\xs@semicoloncode  
  }
\xs@ChangeCatcodes
/


§%
We declare xesearch as a package in LaTeX.
§
\CodeEscape!
/
\ifdefined\ProvidesPackage
  \def\xs@err#1{\PackageError{xesearch}{#1}{}}
  \ProvidesPackage{!FileName}[!FileDate!space !FileVersion!space Searching documents.]
\else
  \def\MessageBreak{^^J}
  \def\xs@err#1{%
    \bgroup
    \newlinechar`\^^J%
    \errorcontextlines=0
    \errmessage{xsearch error: #1}%
    \egroup
    }
\fi
/
\UndoCodeEscape

§%
\mac{unexpanded}
already exists in ConTeXt, and the meaning of the $\varepsilon$-TeX
primitive is taken over by \mac{normalunexpanded}, so we have to
make the proper adjustment (many thanks to Wolfgang Schuster, who
signalled this to me).

\mac{xs@contextmodule} is an empty
command let to \mac{relax} when xesearch is loaded with ConTeXt.
§
/
\ifcsname xs@contextmodule\endcsname
  \let\xs@unexpanded\normalunexpanded
\else
  \let\xs@unexpanded\unexpanded
\fi
/

§%
Some keywords, indispensable macros,
and a bunch of \mac{new} things.
§
/
\def\xs@end{\xs@end}
\def\xs@empty{}
\def\xs@star{*}
\def\xs@exclamation{!}
\def\xs@question{?}
\def\xs@starexclam{*!}
\def\xs@exclamstar{!*}
\def\xs@words{words}
\def\xs@prefixes{prefixes}
\def\xs@suffixes{suffixes}
\def\xs@gobble#1{}
\def\xs@Lowercase#1#2{\lowercase{\def#2{#1}}}
\let\xs@relax\relax
\newcount\xs@TempCount
\newcount\xs@CaseSensitive
\newcount\xs@TempLength
\newcount\xs@Length
\newbox\xs@Box
\newif\ifxs@Concatenate
\newif\ifxs@String
\newif\ifxs@Affix
\newif\ifxs@Prefix
\newif\ifxs@Suffix
\newif\ifxs@BadWord
\newif\ifxs@Star
\newif\ifxs@Phrase
\newif\ifxs@Match
\newtoks\xs@DefToks
\newtoks\xs@NoReplaceToks
/

\subsection{Character classes}

§%
Basic classes: natural delimiters (spaces and
primitives), left and right delimiters (set
by \mac{MakeBoundary}) and the normal class,
out of which letters and delimiters will be taken.
§
/
\chardef\xs@NatDel=\e@alloc@intercharclass@top
\chardef\xs@lrDel=\numexpr\e@alloc@intercharclass@top-1\relax
\chardef\xs@Classes=\numexpr\e@alloc@intercharclass@top-2\relax
\chardef\xs@Classless=0
\XeTeXinterchartoks\xs@lrDel\xs@Classless={\xs@LearnLetter}
\XeTeXinterchartoks\xs@NatDel\xs@Classless={\xs@LearnLetter}
\XeTeXinterchartoks\xs@NatDel\xs@lrDel{\xs@EndString}
\xs@TempCount\xs@Classes
/


§%
This is how we make boundaries. Note that if
the character has a character class of 8 or 9,
we don't change it. The interchartoks will be
modified, however.
§
/
\def\xs@Delimiters{}
\def\xs@MakeDel#1{%
  \ifx#1\xs@end
    \let\xs@next\relax
  \else
    \let\xs@next\xs@MakeDel
    \unless\ifnum\the\XeTeXcharclass`#1=7
      \unless\ifnum\the\XeTeXcharclass`#1=8
        \XeTeXcharclass`#1=\xs@lrDel
        \expandafter\def\expandafter\xs@Delimiters\expandafter{\xs@Delimiters#1}%
      \fi
    \fi
  \fi\xs@next}
\xs@MakeDel\{\}.,;:!?[()]-'`\xs@end
\def\MakeBoundary#1{%
  \xs@MakeDel#1\xs@end
  }
\def\UndoBoundary#1{%
  \xs@UndoBoundary#1\xs@end
  }
\def\xs@UndoBoundary#1{%
  \def\xs@temp{#1}%
  \ifx\xs@temp\xs@end
    \let\xs@next\relax
  \else
    \ifnum\the\XeTeXcharclass`#1=\xs@lrDel
      \def\xs@RemoveFromDelimiters##1#1##2\xs@end{%
        \def\xs@Delimiters{##1##2}%
      }%
      \expandafter\xs@RemoveFromDelimiters\xs@Delimiters\xs@end
    \fi
    \XeTeXcharclass`#1=0
    \let\xs@next\xs@UndoBoundary
  \fi\xs@next
  }
/

§%
This is the macro that turn a letter into
a letter recording itself. It is recursive.
Each new letter is assigned a new character
class (from 253 downward), then it is made
to start the recording
process after delimiters, to stop it before,
and to add itself to \mac{xs@String} in both case
or next to another letter. Before natural delimiters, however,
if the word recorded up to now is part of a
possible phrase, the process is not stopped.
The \textsf{polyglossia} patch is needed when
e.g. \texttt{?} is not turned into a \mac{xs@lrDel}
but keeps its character class as defined by
\textsf{polyglossia}.
§
/
\def\xs@Letters{}%
\def\xs@CreateLetter#1{%
  \ifx#1\xs@end
    \let\xs@next\relax
  \else
    \expandafter\def\expandafter\xs@Letters\expandafter{\xs@Letters#1}%
    \XeTeXcharclass`#1=\xs@TempCount
    \expandafter\def\csname\the\xs@TempCount @xstring@letter\endcsname{#1}%
    \edef\xs@PolyglossiaPatch{%
      \xs@unexpanded{\XeTeXinterchartoks\xs@TempCount7}{%
        \xs@unexpanded{\xdef\xs@String{\xs@String#1}\xs@EndString}%
        \the\XeTeXinterchartoks0 7}%
      \xs@unexpanded{\XeTeXinterchartoks\xs@TempCount8}{%
        \xs@unexpanded{\xdef\xs@String{\xs@String#1}\xs@EndString}%
        \the\XeTeXinterchartoks0 8}%
      \xs@unexpanded{\XeTeXinterchartoks8\xs@TempCount}{%
        \the\XeTeXinterchartoks8 0 \xs@unexpanded{\xs@StartSring}}%
      }%
    \xs@PolyglossiaPatch
    \XeTeXinterchartoks\xs@TempCount\xs@Classless{%
      \xdef\xs@String{\xs@String#1}%
      \xs@LearnLetter}%
    \XeTeXinterchartoks\xs@lrDel\xs@TempCount{%
      \xs@StopTracing
      \xs@StartString
      }%
    \XeTeXinterchartoks\xs@NatDel\xs@TempCount{%
      \xs@StopTracing
      \xs@StartString
      }%
    \XeTeXinterchartoks\xs@TempCount\xs@lrDel{%
      \xdef\xs@String{\xs@String#1}\xs@EndString}%
    \XeTeXinterchartoks\xs@TempCount\xs@NatDel{%
      \xdef\xs@String{\xs@String#1}%
      \ifcsname\xs@String @xs@phrases@cs\endcsname
        \XeTeXinterchartokenstate0
        \xdef\xs@Stack{%
          \xs@String\noexpand\xs@end\xs@unexpanded\expandafter{\xs@Stack}%
          }%
        \edef\xs@String{\xs@unexpanded\expandafter{\xs@String} }%
        \XeTeXinterchartokenstate1
      \else
        \expandafter\xs@Lowercase\expandafter{\xs@String}\xs@lcString
        \ifcsname\xs@lcString @xs@phrases@ncs\endcsname
          \XeTeXinterchartokenstate0
          \xdef\xs@Stack{%
            \xs@String\noexpand\xs@end\xs@unexpanded\expandafter{\xs@Stack}%
            }%
          \edef\xs@String{\xs@unexpanded\expandafter{\xs@String} }%
          \XeTeXinterchartokenstate1
        \else
          \expandafter\expandafter\expandafter\xs@EndString
        \fi
      \fi
      }%
    \xs@TempCount\xs@Classes
    \xs@MakeInterCharToks#1%
    \advance\xs@TempCount-1
    \let\xs@next\xs@CreateLetter
  \fi\xs@next
  }
/

§
This is the recursive macro which creates the
\mac{XeTeXinterchartoks} for the new letter and
all existing letter.
§
/
\def\xs@MakeInterCharToks#1{%
  \ifnum\xs@TempCount=\XeTeXcharclass`#1
    \XeTeXinterchartoks\xs@TempCount\xs@TempCount{\xdef\xs@String{\xs@String#1}}%
    \let\xs@next\relax
  \else\let\xs@next\relax
    \expandafter\expandafter\expandafter%
      \xs@Xict\csname\the\xs@TempCount @xstring@letter\endcsname%
      \xs@TempCount{\XeTeXcharclass`#1}%
    \xs@Xict#1{\XeTeXcharclass`#1}\xs@TempCount
    \advance\xs@TempCount-1
    \def\xs@next{\xs@MakeInterCharToks#1}%
  \fi\xs@next}
\def\xs@Xict#1#2#3{%
  \XeTeXinterchartoks#2#3{\xdef\xs@String{\xs@String#1}}%
  }
/


§%
xesearch learns a letter when it encounters a character
with character class 0. Since \mac{xs@CreateLetter}
is local, and since it is often executed inside the
word box (see \mac{xs@StartString}), we record the
letters thus created in \mac{xs@PendingLetters} and
create them for good after the group.
§
/
\def\xs@PendingLetters{}%
\def\xs@LearnLetter#1{%
  \xs@CreateLetter#1\xs@end
  \ifxs@String
    \xdef\xs@PendingLetters{\xs@PendingLetters#1}%
  \fi
  #1}
/


\subsection{Search lists}

§%
First we define whether there's an \texttt{!}
or a \texttt{*} or both.
§
/
\def\SearchList{%
  \xs@ChangeCatcodes
  \xs@StarOrExclam\xs@Search
  }
\def\xs@StarOrExclam#1#2#{%
  \def\xs@temp{#2}%
  \ifx\xs@temp\xs@star
    \xs@CaseSensitive2
    \xs@Concatenatefalse
  \else
    \ifx\xs@temp\xs@exclamation
      \xs@CaseSensitive0
      \xs@Concatenatetrue
    \else
      \ifx\xs@temp\xs@starexclam
        \xs@CaseSensitive2
        \xs@Concatenatetrue
      \else
        \ifx\xs@temp\xs@exclamstar
          \xs@CaseSensitive2
          \xs@Concatenatetrue
        \else
          \xs@CaseSensitive0
          \xs@Concatenatefalse
        \fi
      \fi
    \fi
  \fi#1%
  }
/

§%
Then, after a basic check on the name of
the list, we record it and defined the macros
associated with this list as the second
argument; these macros are the normal and
!-marked (`\texttt{noreplace}') versions
(both are created because there might be an
\mac{AddToList} of a different type). Finally we launch
the word-maker on the list of words.
\mac{AddToList} is equivalent with some
adjustments.
§

/
\def\xs@Search#1#2#3{%
  \ifcsname#1@xs@searchlist\endcsname
    \xs@err{%
      `#1' already exists.\MessageBreak
      Use \string\AddToList{#1}{<words>} to add words to it%
      }%
  \else
    \def\xs@ListName{#1}%
    \expandafter\def\csname\xs@ListName @words\endcsname{}%
    \expandafter\def\csname #1@xs@searchlist\endcsname##1{#2}%
    \expandafter\def\csname #1@xs@searchlist@noreplace\endcsname##1{#2}%
    \expandafter\xs@MakeWord#3,\xs@end,%
    \xs@RestoreCatcodes
  \fi
  }
\def\AddToList{%
  \xs@ChangeCatcodes
  \xs@StarOrExclam\xs@AddToList
  }
\def\xs@AddToList#1#2{%
  \ifcsname#1@xs@searchlist\endcsname
    \def\xs@ListName{#1}%
    \expandafter\xs@MakeWord#2,\xs@end,%
    \xs@RestoreCatcodes
  \else
    \xs@err{`#1' is not a list}%
  \fi
  \xs@RestoreCatcodes
  }
/

§%
This takes each word one by one and checks
and creates a few things.
§
/
\def\xs@MakeWord#1,{%
  \def\xs@TempWord{#1}%
  \ifx\xs@TempWord\xs@end
    \let\xs@next\relax
  \else
    \ifcsname\ifnum\xs@CaseSensitive=2*\fi#1@\xs@ListName\endcsname
      \xs@err{You have already specified `\ifnum\xs@CaseSensitive=2*\fi#1'%
        in `\xs@ListName'. \MessageBreak You can't do it twice}%
    \else
      \csname#1@\xs@ListName\endcsname
      \edef\xs@TempWord{#1}%
      \chardef\xs@ParseState=0
      \xs@BadWordfalse
      \xs@Starfalse
      \xs@Prefixfalse
      \xs@Suffixfalse
/

§%
For instance, we parse the word, to find prefixes
or suffixes or forbidden things, like control
sequences. Then we suppress prefixes and suffixes.
§
/
      \xs@ParseWord#1\xs@end
      \unless\ifxs@BadWord
        \ifxs@Star
          \xs@CaseSensitive1
          \expandafter\xs@SuppressPrefix\xs@TempWord\xs@end
        \fi
        \ifxs@Prefix
          \expandafter\xs@SuppressSuffix\xs@TempWord
        \else
          \ifxs@Suffix
            \expandafter\xs@SuppressPrefix\xs@TempWord\xs@end
          \fi
        \fi
/


§%
Depending on case-sensitivity, we put the word
in lowercase or not, and we define a keyword to
record the case-sensitivity.
§
/
        \def\xs@Phrase{}%
        \ifcase\xs@CaseSensitive
          \expandafter\xs@Lowercase\expandafter{\xs@TempWord}\xs@TempWord
          \def\xs@cs{ncs}%
          \expandafter\xs@CheckSpaces\xs@TempWord\xs@end
        \or
          \def\xs@cs{cs}%
          \expandafter\xs@CheckSpaces\xs@TempWord\xs@end
          \xs@CaseSensitive0
        \or
          \def\xs@cs{cs}%
          \expandafter\xs@CheckSpaces\xs@TempWord\xs@end
        \fi
/

§%
Finally, we patch the replacement texts associated with
this word or affix.
§
/
        \ifxs@Prefix
          \xs@MakePrefix
          \def\xs@WordType{prefixes}%
          \expandafter\xs@PatchDef\csname\xs@ListName @xs@searchlist\endcsname
        \else
          \ifxs@Suffix
            \xs@MakeSuffix
            \def\xs@WordType{suffixes}%
            \expandafter\xs@PatchDef\csname\xs@ListName @xs@searchlist\endcsname
          \else
            \def\xs@WordType{words}%
            \expandafter\xs@PatchDef\csname\xs@ListName @xs@searchlist\endcsname
          \fi
        \fi
      \fi
    \fi
    \let\xs@next\xs@MakeWord
  \fi\xs@next
  }
/

§%
This is a basic finite state automaton. It starts in
state 0. A star brings it in state 1. In both 0 and 1,
if it finds a letter or a \texttt{?} it goes in state 2.
From there, only letters and a \texttt{?} at the very end of
the word are allowed. Boundaries make it crash. The distinction
between stage 0 and stage 1 is needed just in case the
user defines the star as a boundary.
§
/
\def\xs@ParseWord#1{%
  \def\xs@temp{#1}%
  \ifx\xs@temp\xs@end
    \let\xs@next\relax
    \ifxs@Suffix
      \ifnum\xs@ParseState=3
        \xs@err{You can't have a prefix and a suffix in the same word.\MessageBreak
          `\xs@unexpanded\expandafter{\xs@TempWord}' won't be searched}%
        \xs@BadWordtrue
      \fi
    \fi
  \else
    \let\xs@next\xs@ParseWord
    \expandafter\ifcat\noexpand#1\relax
      \xs@BadChar#1{control sequences are forbidden}%
    \else
      \ifcase\xs@ParseState
        \chardef\xs@TempNum=\XeTeXcharclass`#1 %
        \ifx\xs@temp\xs@star
          \xs@Startrue
          \chardef\xs@ParseState=1
          \let\xs@next\xs@ParseWord
        \else
          \ifx\xs@temp\xs@question
            \xs@Suffixtrue
            \chardef\xs@ParseState=2
            \let\xs@next\xs@ParseWord
          \else
            \ifnum\xs@TempNum>\xs@Classes
              \xs@BadChar#1{it's already a string delimiter}%
            \else
              \chardef\xs@ParseState=2
              \ifnum\xs@TempNum=0 
                \xs@CreateLetter#1\xs@end
                \let\xs@next\xs@ParseWord
              \fi
            \fi
          \fi
        \fi
%
      \or
        \chardef\xs@ParseState=2
        \chardef\xs@TempNum=\XeTeXcharclass`#1 %
        \let\xs@next\xs@ParseWord
        \ifx\xs@temp\xs@question
          \xs@Suffixtrue
        \else
          \ifnum\xs@TempNum>\xs@Classes
            \xs@BadChar#1{it's already a string delimiter}%
          \else
            \ifnum\xs@TempNum=0
              \xs@CreateLetter#1\xs@end
              \let\xs@next\xs@ParseWord
            \fi
          \fi
        \fi
%
      \or
        \let\xs@next\xs@ParseWord
        \chardef\xs@TempNum=\XeTeXcharclass`#1 %
        \ifx\xs@temp\xs@question
          \xs@Prefixtrue
          \chardef\xs@ParseState=3
        \else
          \ifnum\xs@TempNum>\xs@Classes
            \xs@BadChar#1{it's already a string delimiter}%
          \else
              \let\xs@next\xs@ParseWord
          \fi
        \fi
      \or
        \xs@BadChar?{it's already a string delimiter}%
      \fi
    \fi
  \fi\xs@next
  }
/

§%
This is in case we find something we don't want in the word.
§
/
\def\xs@BadChar#1#2{%
  \def\xs@next##1\xs@end{}%
  \xs@BadWordtrue
  \xs@err{%
    You can't use `\noexpand#1' in `\xs@unexpanded\expandafter{\xs@TempWord}',\MessageBreak
    #2.\MessageBreak
    `\xs@unexpanded\expandafter{\xs@TempWord}' won't be searched
    }%
  }
/

§%
In case the word is a phrase, we have to know that,
so we check spaces. In case there are some, we record
\texttt{word1}, then \texttt{word1 word2}, then
\texttt{word1 word2 word3}, etc., as strings that
may lead to phrases and should be recognized as
such when xesearch is searching.
§
/
\def\xs@CheckSpaces#1\xs@end{%
  \xs@@CheckSpaces#1 \xs@end
  }
\def\xs@@CheckSpaces#1 #2\xs@end{%
  \def\xs@temp{#2}%
  \ifx\xs@temp\xs@empty
    \let\xs@next\relax
  \else
    \expandafter\xs@MakePhrase\xs@Phrase\xs@end#1\xs@end
    \def\xs@next{\xs@@CheckSpaces#2\xs@end}%
  \fi\xs@next
  }
\def\xs@MakePhrase#1\xs@end#2\xs@end{%
  \ifx\xs@Phrase\xs@empty
    \expandafter\def\csname#2@xs@phrases@\xs@cs\endcsname{}%
    \edef\xs@Phrase{#2}%
  \else
    \expandafter\def\csname#1 #2@xs@phrases@\xs@cs\endcsname{}%
    \edef\xs@Phrase{#1 #2}%
  \fi
  }%
/

§%
In case the word was recognized as an affix, we add it
to the list of affixes beginning (in the case of prefixes)
or ending (in the case of suffixes) with a given letter
(this is supposed to make xesearch faster: when xesearch scans
a word, it searches e.g. prefixes if and only if there
are prefixes with the same initial letter as the word
under investigation, and it compares it to those words
only). The affix is also added to the lists sorted by
length in both orders. 
§
/
\def\xs@GetFirstLetter#1#2\xs@end{%
  \def\xs@FirstLetter{#1}%
  }
\def\xs@MakePrefix{%
  \expandafter\ifx\csname\xs@TempWord @\xs@cs @xs@prefixes\endcsname\relax
    \expandafter\xs@GetFirstLetter\xs@TempWord\xs@end
    \ifcsname xs@prefixes@\xs@FirstLetter @\xs@cs\endcsname
      \expandafter\edef\csname xs@prefixes@\xs@FirstLetter @\xs@cs\endcsname{%
        \csname xs@prefixes@\xs@FirstLetter @\xs@cs\endcsname\xs@TempWord,}%
      \def\xs@Sign{<}%
      \xs@Insert{\xs@TempWord}{\csname xs@prefixes@\xs@FirstLetter @\xs@cs @longer\endcsname}%
      \def\xs@Sign{>}%
      \xs@Insert{\xs@TempWord}{\csname xs@prefixes@\xs@FirstLetter @\xs@cs @shorter\endcsname}%
    \else
      \expandafter\edef\csname xs@prefixes@\xs@FirstLetter @\xs@cs\endcsname{\xs@TempWord,}%
      \expandafter\edef\csname xs@prefixes@\xs@FirstLetter @\xs@cs @longer\endcsname{\xs@TempWord,}%
      \expandafter\edef\csname xs@prefixes@\xs@FirstLetter @\xs@cs @shorter\endcsname{\xs@TempWord,}%
    \fi
  \fi
  }
\def\xs@GetLastLetter#1{%
  \ifx#1\xs@end
    \let\xs@next\relax
  \else
    \let\xs@next\xs@GetLastLetter
    \def\xs@LastLetter{#1}%
  \fi\xs@next
  }
\def\xs@MakeSuffix{%
  \expandafter\ifx\csname\xs@TempWord @\xs@cs @xs@suffixes\endcsname\relax
    \expandafter\xs@GetLastLetter\xs@TempWord\xs@end
    \ifcsname xs@suffixes@\xs@LastLetter @\xs@cs\endcsname
      \expandafter\edef\csname xs@suffixes@\xs@LastLetter @\xs@cs\endcsname{%
        \csname xs@suffixes@\xs@LastLetter @\xs@cs\endcsname\xs@TempWord,}%
      \def\xs@Sign{<}%
      \xs@Insert{\xs@TempWord}{\csname xs@suffixes@\xs@LastLetter @\xs@cs @longer\endcsname}%
      \def\xs@Sign{>}%
      \xs@Insert{\xs@TempWord}{\csname xs@suffixes@\xs@LastLetter @\xs@cs @shorter\endcsname}%
    \else
      \expandafter\edef\csname xs@suffixes@\xs@LastLetter @\xs@cs\endcsname{\xs@TempWord,}%
      \expandafter\edef\csname xs@suffixes@\xs@LastLetter @\xs@cs @longer\endcsname{\xs@TempWord,}%
      \expandafter\edef\csname xs@suffixes@\xs@LastLetter @\xs@cs @shorter\endcsname{\xs@TempWord,}%
    \fi
  \fi
  }
/

§%
These suppress the \texttt{?} at the beginning or the end
of the word.
§
/
\def\xs@SuppressPrefix#1#2\xs@end{\def\xs@TempWord{#2}}
\def\xs@SuppressSuffix#1?{\def\xs@TempWord{#1}}
/

§%
Here's how we sort the list: we check each affix, and
we insert the affix to be added just before the the
first affix that is shorter or longer, depending on
the order.
§
/
\def\xs@CountLetter#1{%
  \ifx#1\xs@end
    \let\xs@next\relax
  \else
    \advance\xs@Length1
    \let\xs@next\xs@CountLetter
  \fi\xs@next
  }
\def\xs@SortList#1,{%
  \ifx#1\xs@end
    \edef\xs@templist{\xs@templist\xs@TempAffix,}%
    \let\xs@next\relax
  \else
    \xs@Length0
    \xs@CountLetter#1\xs@end
    \ifnum\xs@Length\xs@Sign\xs@AffixLength
      \edef\xs@templist{\xs@templist\xs@TempAffix,#1,}%
      \let\xs@next\xs@EndList
    \else
      \edef\xs@templist{\xs@templist#1,}%
      \let\xs@next\xs@SortList
    \fi
  \fi\xs@next
  }
\def\xs@EndList#1\xs@end,{%
  \edef\xs@templist{\xs@templist#1}%
  }
\def\xs@Insert#1#2{%
  \def\xs@TempAffix{#1}%
  \xs@Length0
  \expandafter\xs@CountLetter#1\xs@end
  \chardef\xs@AffixLength\xs@Length
  \def\xs@templist{}%
  \expandafter\expandafter\expandafter\xs@SortList#2\xs@end,
  \expandafter\let#2\xs@templist
  }
/

§%
Finally, we make the definition of the word. First,
we associate it with the word, so we'll know which
words to modify in case of a \mac{StopList}, and to
which type it belongs (case-sensitivity, affix or
full word, \texttt{!}-marked or not). Then
we make both the normal replacement text and the
`no-repla\-cement' replacement text.
§
/     
\def\xs@PatchDef#1{%
  \expandafter\edef\csname\xs@ListName @words\endcsname{%
    \csname\xs@ListName @words\endcsname
    \xs@TempWord:::\xs@cs:::\xs@WordType:::\ifxs@Concatenate!\fi:::%
    }%
  \expandafter\ifx\csname\xs@TempWord @\xs@cs @xs@\xs@WordType\endcsname\relax%
    \xs@DefToks{\xs@FinalString}%
  \else
    \xs@DefToks\expandafter\expandafter\expandafter{%
      \csname\xs@TempWord @\xs@cs @xs@\xs@WordType\endcsname}%
  \fi
  \expandafter\ifx\csname\xs@TempWord @\xs@cs @xs@\xs@WordType @noreplace\endcsname\relax
    \xs@NoReplaceToks{}%
  \else
    \xs@NoReplaceToks\expandafter\expandafter\expandafter{%
      \csname\xs@TempWord @\xs@cs @xs@\xs@WordType @noreplace\endcsname}%
  \fi
  \ifxs@Concatenate
    \expandafter\edef\csname\xs@TempWord @\xs@cs @xs@\xs@WordType\endcsname{\the\xs@DefToks}%
    \expandafter\edef\csname\xs@TempWord @\xs@cs @xs@\xs@WordType @noreplace\endcsname{%
      \the\xs@NoReplaceToks
      \xs@unexpanded{\expandafter#1\expandafter{\xs@String}}%
      }%
  \else
    \expandafter\edef\csname\xs@TempWord @\xs@cs @xs@\xs@WordType\endcsname{%
      \noexpand\expandafter\noexpand#1\noexpand\expandafter{\the\xs@DefToks}%
      }%
  \fi
  }
/


§%
Stopping a list is a delicate process:
we have to extract the definition associated
with the list from the words where it appears,
and it is nested in case it is not \texttt{!}-marked.
§
/
\def\StopList{%
  \xs@ChangeCatcodes
  \xs@StopList
  }
\def\xs@StopList#1{%
  \xs@@StopList#1,\xs@end,%
  \xs@RestoreCatcodes
  }
\def\xs@@StopList#1,{%
  \def\xs@temp{#1}%
  \ifx\xs@temp\xs@end
    \let\xs@next\relax
  \else
    \ifcsname#1@xs@searchlist\endcsname
      \unless\ifcsname#1@xs@stoppedlist\endcsname
        \csname#1@xs@stoppedlist\endcsname
        \expandafter\def\expandafter\xs@ToRemove\expandafter{%
          \csname#1@xs@searchlist\endcsname
          }%
        \expandafter\expandafter\expandafter%
          \xs@PatchWords\csname #1@words\endcsname\xs@end::::::::::::%
      \fi
    \else
      \xs@err{`#1' is not a list}%
    \fi
  \let\xs@next\xs@@StopList
  \fi\xs@next
  }
/

§%
We modify the adequate replacement text: no-replace
or normal.
§
/
\def\xs@PatchWords#1:::#2:::#3:::#4:::{%
  \def\xs@TempWord{#1}%
  \ifx\xs@TempWord\xs@end
    \let\xs@next\relax
  \else
    \def\xs@temp{#4}%
    \ifx\xs@temp\xs@exclamation
      \expandafter\expandafter\expandafter%
        \xs@RemoveFromNoReplace\expandafter\xs@ToRemove\csname#1@#2@xs@#3@noreplace\endcsname
    \fi
    \def\xs@cs{#2}%
    \def\xs@WordType{#3}%
    \expandafter\xs@RemoveFromDef\csname#1@#2@xs@#3\endcsname
    \let\xs@next\xs@PatchWords
  \fi\xs@next
  }
/

§%
Removing from no-replace is rather easy, since it's
nothing more than:
\par\noindent
\mac{expandafter}\mac{\ttslant{<list1-macro>}}\mac{expandafter}\{\mac{xs@String}\}
\par\noindent
\mac{expandafter}\mac{\ttslant{<list2-macro>}}\mac{expandafter}\{\mac{xs@String}\}
\par\noindent
\mac{expandafter}\mac{\ttslant{<list3-macro>}}\mac{expandafter}\{\mac{xs@String}\}
\par\noindent
So we define a macro on the fly to find the definition
we want to remove. If there's nothing left, we let
this no-replace to \mac{relax}, so this word might
be removed altogether when we evaluate what we find.
§
/
\def\xs@RemoveFromNoReplace#1#2{%
  \def\xs@Erase##1\expandafter#1\expandafter##2##3\xs@end{%
    \def#2{##1##3}%
    \ifx#2\xs@empty
      \let#2\relax
    \fi
    }%
  \expandafter\xs@Erase#2\xs@end
  }
/

§%
Normal replacement texts have the following structure:
\par\noindent
\mac{expandafter}\mac{\ttslant{<list1-macro>}}\mac{expandafter}\{\par\noindent
\strut{} \strut{} \mac{expandafter}\mac{\ttslant{<list2-macro>}}\mac{expandafter}\{\par\noindent
\strut{} \strut{} \strut{} \strut{} ...\par\noindent
\strut{} \strut{} \strut{} \strut{}\strut{} \strut{} \mac{xs@FinalString}\par\noindent
\strut{} \strut{} \strut{} \strut{} ...\par\noindent
\strut{} \strut{} \}\}
\par\noindent
So we scan this recursively and rebuild it piecewise,
removing the list that was stopped. If in the end
there remains \mac{xs@FinalString} only, then
there's no replacement text anymore, and if moreover
the no-replace part is equal to \mac{relax}, then
there's nothing left for that word and it shouldn't
be tested anymore. So we let the definition associated
with this word to \mac{relax} or we remove it from
affixes.
§
/
\def\xs@final{\xs@FinalString}
\def\xs@TempDef{}
\def\xs@RemoveFromDef#1{%
  \def\xs@TempDef{}%
  \def\xs@Def{\xs@FinalString}%
  \unless\ifx#1\xs@final
    \expandafter\xs@Extract#1%
  \fi
  \let#1\xs@Def
  \ifx#1\xs@final
    \expandafter\ifx\csname\expandafter\xs@gobble\string#1@noreplace\endcsname\relax
      \ifx\xs@WordType\xs@words
        \let#1\relax
      \else
        \xs@RemoveFromAffixes
      \fi
    \fi
  \fi
  }
\def\xs@Extract\expandafter#1\expandafter#2{%
  \def\xs@temp{#1}%
  \unless\ifx\xs@temp\xs@ToRemove
    \edef\xs@TempDef{%
      \noexpand#1,%
      \xs@unexpanded\expandafter{\xs@TempDef}%
      }%
  \fi
  \def\xs@temp{#2}%
  \ifx\xs@temp\xs@final
    \def\xs@next{%
      \expandafter\xs@Rebuild\xs@TempDef\xs@end,%
      }%
  \else
    \def\xs@next{%
      \xs@Extract#2%
      }%
  \fi\xs@next
  }
\def\xs@Rebuild#1,{%
  \ifx#1\xs@end
    \let\xs@next\relax
  \else
    \let\xs@next\xs@Rebuild
    \edef\xs@Def{%
      \xs@unexpanded{\expandafter#1\expandafter}%
      \noexpand{%
      \xs@unexpanded\expandafter{\xs@Def}%
      \noexpand}%
      }%
  \fi\xs@next
  }%
/

§%
Removing an affix from a list is easy:
we scan each word and rebuild the list,
removing the affix we want to deactivate.
§
/
\def\xs@RemoveFromAffixes{%
  \ifx\xs@WordType\xs@prefixes
    \expandafter\xs@GetFirstLetter\xs@TempWord\xs@end
    \let\xs@Letter\xs@FirstLetter
  \else
    \expandafter\xs@GetLastLetter\xs@TempWord\xs@end
    \let\xs@Letter\xs@LastLetter
  \fi
  \def\xs@templist{}%
  \expandafter\expandafter\expandafter%
    \xs@CleanList\csname xs@\xs@WordType @\xs@Letter @\xs@cs\endcsname\xs@end,%
  \expandafter\let\csname xs@\xs@WordType @\xs@Letter @\xs@cs\endcsname\xs@templist
  \def\xs@templist{}%
  \expandafter\expandafter\expandafter%
    \xs@CleanList\csname xs@\xs@WordType @\xs@Letter @\xs@cs @shorter\endcsname\xs@end,%
  \expandafter\let\csname xs@\xs@WordType @\xs@Letter @\xs@cs @shorter\endcsname\xs@templist
  \def\xs@templist{}%
  \expandafter\expandafter\expandafter%
    \xs@CleanList\csname xs@\xs@WordType @\xs@Letter @\xs@cs @longer\endcsname\xs@end,%
  \expandafter\let\csname xs@\xs@WordType @\xs@Letter @\xs@cs @longer\endcsname\xs@templist
  \expandafter\let\csname\xs@TempWord @\xs@cs @xs@\xs@WordType\endcsname\relax
  }
\def\xs@CleanList#1,{%
  \def\xs@temp{#1}%
  \ifx\xs@temp\xs@end
    \let\xs@next\relax
  \else
    \let\xs@next\xs@CleanList
    \unless\ifx\xs@temp\xs@TempWord
      \edef\xs@templist{\xs@templist#1,}%
    \fi
  \fi\xs@next
  }
/

\subsection{Testing words}

§%
Here comes the big part: collecting words and
testing them. When a letter follows a delimiter,
we reset some values and start collecting the
letters in a box...
§
/
\def\xs@Stack{}
\def\xs@Remainder{}
\def\xs@StartString{%
  \xs@Stringtrue
  \let\xs@StartString\relax
  \def\xs@String{}%
  \def\PrefixFound{}%
  \def\SuffixFound{}%
  \def\AffixFound{}%
  \def\xs@Stack{}%
  \def\xs@Remainder{}%
  \xs@Phrasefalse
  \setbox\xs@Box=\hbox\bgroup
  }
\let\xs@@StartString\xs@StartString
/

§%
\noindent ...and when a delimiter shows up
again, unless we're tracking a phrase, we
close the box, create the unknown
letters that we've found in it, evaluate the
word and finally output the result of this
evaluation.
§
/
\def\xs@EndString{%
  \ifxs@String
    \egroup
    \xs@Stringfalse
    \expandafter\xs@CreateLetter\xs@PendingLetters\xs@end
    \gdef\xs@PendingLetters{}%
    \xs@Evaluate
    \xs@Restore
    \xs@StartTracing
    \expandafter\xs@Remainder
  \fi
  }
/

§%
And here are the tests. The \texttt{F} test is for
case-sensitive full words and just checks whether there is 
a definition for this word in this case. If it finds anything,
it puts it around the string that already exists, i.e.
either the bare word or the word alreay surrounded by
replacement texts. Hence The bunch of \mac{expandafter}s.
If there's a no-replace, we also add it to the existing
ones. \mac{xs@relax} is just a placeholder to add the
inhibitions defined with \mac{SearchOrder}.
§
/
\def\xs@@F@Test{%
  \expandafter\unless\expandafter\ifx\csname\xs@String @cs@xs@words\endcsname\relax
    \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter%
    \def%
    \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter%
    \xs@FinalString%
    \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter{%
      \csname\xs@String @cs@xs@words\endcsname}%
    \expandafter\unless\expandafter\ifx\csname\xs@String @cs@xs@words@noreplace\endcsname\relax
      \edef\xs@NoReplace{%
        \xs@unexpanded\expandafter{\xs@NoReplace}%
        \xs@unexpanded\expandafter{\csname\xs@String @cs@xs@words@noreplace\endcsname}%
        }%
    \fi
    \xs@Matchtrue
    \xs@relax
    \xs@relax
  \fi
  }
/

§%
The \texttt{f} does the same thing, except it
puts the word in lowercase before hand.
§
\begin{code*}
\def\xs@@f@Test{%
  \expandafter\xs@Lowercase\expandafter{\xs@String}\xs@lcString
  \expandafter\unless\expandafter\ifx\csname\xs@lcString @ncs@xs@words\endcsname\relax
    \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter%
    \def%
    \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter%
    \xs@FinalString%
    \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter{%
      \csname\xs@lcString @ncs@xs@words\endcsname}%
    \expandafter\unless\expandafter\ifx\csname\xs@lcString @ncs@xs@words@noreplace\endcsname\relax
      \edef\xs@NoReplace{%
        \xs@unexpanded\expandafter{\xs@NoReplace}%
        \xs@unexpanded\expandafter{\csname\xs@lcString @ncs@xs@words@noreplace\endcsname}%
        }%
    \fi
    \xs@Matchtrue
    \xs@relax
    \xs@relax
  \fi
  }
\end{code*}

§%
Tests on prefixes check whether there exists a
prefix list beginning with the same letter as
the word at stake, and in this case run the
\mac{xs@CheckPrefixes} test.
§
/  
\def\xs@@p@Test{%
  \xs@Affixfalse
  \expandafter\xs@GetFirstLetter\xs@lcString\xs@end
  \ifcsname xs@prefixes@\xs@FirstLetter @ncs\endcsname
    \let\xs@@String\xs@lcString
    \def\xs@cs{ncs}%
    \let\xs@WhatNext\xs@p@WhatNext
    \expandafter\expandafter\expandafter%
    \xs@CheckPrefixes\csname xs@prefixes@\xs@FirstLetter @ncs\p@order\endcsname\xs@end,%
  \fi
  \ifxs@Affix
    \xs@Affixfalse
    \xs@Matchtrue
    \xs@relax
    \xs@relax
   \fi
  }
\def\xs@@P@Test{%
  \xs@Affixfalse
  \expandafter\xs@GetFirstLetter\xs@String\xs@end
  \ifcsname xs@prefixes@\xs@FirstLetter @cs\endcsname
    \let\xs@@String\xs@String
    \def\xs@cs{cs}%
    \let\xs@WhatNext\xs@P@WhatNext
    \expandafter\expandafter\expandafter%
    \xs@CheckPrefixes\csname xs@prefixes@\xs@FirstLetter  @cs\P@order\endcsname\xs@end,%
  \fi
  \ifxs@Affix
    \xs@Affixfalse
    \xs@Matchtrue
    \xs@relax
    \xs@relax
   \fi
  }
/

§%
Prefixes are tested one by one by creating a
macro on the fly where one delimiter is the prefix.
Then we put the word at stake before it and execute
the macro, and if there's no first argument, then
the word matches the prefix. For instance, if
the word is \texttt{democracy} and the prefix
is \texttt{demo} then we test\par\noindent
\mac{xs@TestPrefix democracydemo}
\par\noindent and obviously the first argument
is empty, since \texttt{demo} is a delimiter.
§
/
\def\xs@CheckPrefixes#1,{%
  \def\xs@temp{#1}%
  \ifx\xs@temp\xs@end
    \let\xs@next\relax
  \else
    \def\xs@TestPrefix##1#1##2\xs@end{%
      \def\xs@temp{##1}%
      \ifx\xs@temp\xs@empty
        \xs@Affixtrue
        \def\PrefixFound{#1}%
        \def\AffixFound{#1}%
        \let\xs@next\xs@WhatNext
        \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter%
        \def%
        \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter%
        \xs@FinalString%
        \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter{%
          \csname#1@\xs@cs @xs@prefixes\endcsname}%
        \expandafter\unless\expandafter\ifx\csname#1@\xs@cs @xs@prefixes@noreplace\endcsname\relax
          \edef\xs@NoReplace{%
            \xs@unexpanded\expandafter{\xs@NoReplace}%
            \xs@unexpanded\expandafter{\csname#1@\xs@cs @xs@prefixes@noreplace\endcsname}%
            }%
        \fi
      \else
        \let\xs@next\xs@CheckPrefixes
      \fi
      }%
    \expandafter\xs@TestPrefix\xs@@String#1\xs@end
  \fi\xs@next
  }
/

§%
The tests for suffixes work along the same lines
as those for prefixes.
§
/
\def\xs@@S@Test{%
  \xs@Affixfalse
  \expandafter\xs@GetLastLetter\xs@String\xs@end
  \ifcsname xs@suffixes@\xs@LastLetter @cs\endcsname
    \let\xs@@String\xs@String
    \def\xs@cs{cs}%
    \let\xs@WhatNext\xs@S@WhatNext
    \expandafter\expandafter\expandafter%
    \xs@CheckSuffixes\csname xs@suffixes@\xs@LastLetter @cs\S@order\endcsname\xs@end,%
  \fi
  \ifxs@Affix
    \xs@Affixfalse
    \xs@Matchtrue
    \xs@relax
    \xs@relax
   \fi
  }
\def\xs@@s@Test{%
  \xs@Affixfalse
  \expandafter\xs@GetLastLetter\xs@lcString\xs@end
  \ifcsname xs@suffixes@\xs@LastLetter @ncs\endcsname
    \let\xs@@String\xs@lcString
    \def\xs@cs{ncs}%
    \let\xs@WhatNext\xs@s@WhatNext
    \expandafter\expandafter\expandafter%
    \xs@CheckSuffixes\csname xs@suffixes@\xs@LastLetter @ncs\s@order\endcsname\xs@end,%
  \fi
  \ifxs@Affix
    \xs@Affixfalse
    \xs@Matchtrue
    \xs@relax
    \xs@relax
   \fi
  }
\def\xs@CheckSuffixes#1,{%
  \def\xs@temp{#1}%
  \ifx\xs@temp\xs@end
    \let\xs@next\relax
  \else
    \def\xs@TestSuffix##1#1##2\xs@end{%
      \def\xs@@temp{##2}%
      \ifx\xs@temp\xs@@temp
        \xs@Affixtrue
        \def\SuffixFound{#1}%
        \def\AffixFound{#1}%
        \let\xs@next\xs@WhatNext
        \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter%
        \def%
        \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter%
        \xs@FinalString%
        \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter%
        {%
          \csname#1@\xs@cs @xs@suffixes\endcsname}%
        \expandafter\unless\expandafter\ifx\csname#1@\xs@cs @xs@suffixes@noreplace\endcsname\relax
          \edef\xs@NoReplace{%
            \xs@unexpanded\expandafter{\xs@NoReplace}%
            \xs@unexpanded\expandafter{\csname#1@\xs@cs @xs@suffixes@noreplace\endcsname}%
            }%
        \fi
      \else%
        \let\xs@next\xs@CheckSuffixes
      \fi
      }%
    \expandafter\xs@TestSuffix\xs@@String#1\xs@end
  \fi\xs@next
  }
/

\subsection{Search order}

§%
\mac{SearchOrder} actually defines \mac{xs@Evaluate}.
First it adds inhibitions to the tests, e.g. `\texttt{F!f;}'
adds \mac{let}\mac{xs@f@Test}\mac{relax} to the \texttt{F}
test in case it is positive, then it adds the tests
themselves, in the specified order, to \mac{xs@Evaluate}.
§
/
\def\SearchOrder{%
  \xs@ChangeCatcodes
  \xs@SearchOrder
  }
\def\xs@SearchOrder#1{%
  \def\xs@Order{}%
  \xs@@SearchOrder#1\xs@end;%
  \edef\xs@Evaluate{%
    \xs@unexpanded{%
      \XeTeXinterchartokenstate=0
      \def\xs@NoReplace{}%
      \let\xs@FinalString\xs@String
      \expandafter\xs@Lowercase\expandafter{\xs@String}\xs@lcString
      }%
    \xs@unexpanded\expandafter{%
      \xs@Order
      \ifxs@Match
        \def\xs@next{%
          \xs@FinalString
          }%        
      \else
/

§%
If the stack is not empty, it means we're
dealing with a phrase; so the evaluation is
not over in case no test has succeded. We first
have to test the phrase minus the last word, then
the phrase minus the last two words, etc.
§
/
        \unless\ifx\xs@Stack\xs@empty
          \xs@Phrasetrue
          \expandafter\xs@PopStack\xs@Stack\xs@@end
          \let\xs@next\xs@Evaluate
        \else
          \ifxs@Phrase
            \def\xs@Stack{}%
            \def\xs@next{\xs@String\xs@Restore}%
          \else
/

§%
If the word was not a phrase, and no test was
successful, we simply put the box that contains it
back into the stream.
§
/
            \def\xs@next{\unhbox\xs@Box\xs@Restore}%
          \fi
        \fi
      \fi\xs@next
      }%
    }%
/

§%
We initialize the tests.
§
/
  \let\xs@f@Test\xs@@f@Test
  \let\xs@F@Test\xs@@F@Test
  \let\xs@p@Test\xs@@p@Test
  \let\xs@P@Test\xs@@P@Test
  \let\xs@s@Test\xs@@s@Test
  \let\xs@S@Test\xs@@S@Test
  \xs@RestoreCatcodes
  }
/

§%
This treats each specification in \mac{SearchOrder}
and the inhibitions, if any.
§
/
\def\xs@@SearchOrder#1#2;{%
  \def\xs@temp{#1#2}%
  \ifx#1\xs@end
    \let\xs@next\relax
  \else
    \def\xs@Inhibit{}%
    \xs@MakeInhibit#2\xs@end
    \expandafter\expandafter\expandafter\xs@PatchTest\csname xs@@#1@Test\endcsname#1%
    \edef\xs@Order{%
      \xs@unexpanded\expandafter{\xs@Order}%
      \xs@unexpanded\expandafter{\csname xs@#1@Test\endcsname}}%
    \let\xs@next\xs@@SearchOrder
  \fi\xs@next
  }
\def\xs@MakeInhibit#1{%
  \def\xs@temp{#1}%
  \ifx#1\xs@end
    \let\xs@next\relax
  \else
    \let\xs@next\xs@MakeInhibit
    \unless\ifx\xs@temp\xs@exclamation%
      \edef\xs@Inhibit{%
        \xs@unexpanded\expandafter{\xs@Inhibit
        \expandafter\let\csname xs@#1@Test\endcsname\relax}%
        }%
    \fi
  \fi\xs@next
  }
\def\xs@PatchTest#1\xs@relax#2\xs@relax#3#4{%
  \expandafter\edef\csname xs@@#4@Test\endcsname{%
    \xs@unexpanded{#1}%
    \xs@unexpanded\expandafter{\expandafter\xs@relax\xs@Inhibit\xs@relax\fi}%
    }%
  }
/

§%
The evaluation ends in any case with the restoration
of the tests, in case they were inhibited. the remainder
is the right part of a discarded phrase. For instance, if
xesearch searches for \texttt{page layout} it will investigate
\texttt{page properties} if it finds it, and the remainder
is \texttt{properties}.
§
/
\def\xs@Restore{%
  \xs@Matchfalse
  \let\xs@f@Test\xs@@f@Test
  \let\xs@F@Test\xs@@F@Test
  \let\xs@p@Test\xs@@p@Test
  \let\xs@P@Test\xs@@P@Test
  \let\xs@s@Test\xs@@s@Test
  \let\xs@S@Test\xs@@S@Test
  \let\xs@StartString\xs@@StartString
  \edef\xs@Remainder{%
    \xs@unexpanded\expandafter{\xs@NoReplace}%
    \xs@unexpanded\expandafter{\xs@Remainder}%
    }%
  \XeTeXinterchartokenstate=1
  }
/

§%
This is used to test phrases minus the last
word on each iteration. The stack itself is
built when the beginning of a phrase is found
before a natural delimiter.
§
/
\def\xs@PopWord#1\xs@end#2\xs@end{%
  \def\xs@String{#2}%
  \def\xs@@PopWord#2##1\xs@end{%
    \edef\xs@Remainder{##1\xs@unexpanded\expandafter{\xs@Remainder}%
      }%
    }%
  \xs@@PopWord#1\xs@end
  }
\def\xs@PopStack#1\xs@end#2\xs@@end{%
  \def\xs@Stack{#2}%
  \expandafter\xs@PopWord\xs@String\xs@end#1\xs@end
  }
/

§%
To search affixes in a given order, we simply
define the list to be used in tests to be the
one with this order.
§
/
\def\SortByLength#1{%
  \def\xs@temp{#1}%
  \ifx\xs@temp\xs@star
    \def\xs@AffixOrder{@shorter}%
    \let\xs@next\xs@SortByLength
  \else
    \def\xs@AffixOrder{@longer}%
    \def\xs@next{\xs@@SortByLength#1\xs@end}%
  \fi
  \xs@next}%
\def\xs@SortByLength#1{%
  \xs@@SortByLength#1\xs@end
  }
\def\xs@@SortByLength#1{%
  \ifx#1\xs@end
    \let\xs@next\relax
  \else
    \expandafter\let\csname #1@order\endcsname\xs@AffixOrder
    \let\xs@next\xs@@SortByLength
  \fi\xs@next
  }
\def\DoNotSort{%
  \def\xs@AffixOrder{}%
  \xs@SortByLength
  }
/

§%
Searching all affixes is done by
setting the \mac{xs@WhatNext} macro to
\mac{xs@\ttslant{<affix>}@WhatNext}, depending
on the text being performed.
§
/
\def\SearchAll#1{%
  \xs@SearchAll#1\xs@end
  }
\def\xs@SearchAll#1{%
  \ifx#1\xs@end
    \let\xs@next\relax
  \else\let\xs@next\xs@SearchAll
    \if#1p%
      \let\xs@p@WhatNext\xs@CheckPrefixes
    \else
      \if#1P
        \let\xs@P@WhatNext\xs@CheckPrefixes
      \else
        \if#1s
          \let\xs@s@WhatNext\xs@CheckSuffixes
        \else
          \let\xs@S@WhatNext\xs@CheckSuffixes
        \fi
      \fi
    \fi
  \fi\xs@next
  }
\def\SearchOnlyOne#1{%
  \xs@SearchOne#1\xs@end
  }
/

§%
Searching only one affix is simply gobbling the
remaining ones in case of a successful test.
§
/
\def\xs@SearchOne#1{%
  \ifx#1\xs@end
    \let\xs@next\relax	
  \else
    \let\xs@next\xs@SearchOne
    \expandafter\def\csname xs@#1@WhatNext\endcsname##1\xs@end,{}%
  \fi\xs@next
  }
/   

  
\subsection{Miscellanea}   

§%
For the moment, starting and stopping the search
is quite brutal.
§
/
\def\StopSearching{%
  \let\xs@StartString\relax
  }
\def\StartSearching{%
  \let\xs@StartString\xs@@StartString
  }
/

§%
Patching the output very simple too.
§
/
\let\xs@OldOutput\relax
\def\PatchOutput{%
  \ifx\xs@OldOutput\relax
    \edef\xs@PatchOutput{%
      \noexpand\def\noexpand\xs@OldOutput{%
        \the\output
        }%
      \noexpand\output{%
        \noexpand\StopSearching
        \the\output
        \noexpand\StartSearching
        }%
      }%
    \expandafter\xs@PatchOutput
  \else
    \xs@err{Output already patched}%
  \fi
  }
\def\NormalOutput{%
  \ifx\xs@OldOutput\relax
    \xs@err{Output has not been patched}%
  \else
    \expandafter\output\expandafter{%
      \xs@OldOutput
      }%
    \let\xs@OldOutput\relax
  \fi
  }
/

§%
As is patching the tracing.
§
/
\def\PatchTracing{%
  \def\xs@StopTracing{%
    \chardef\xs@tracingcommands\tracingcommands
    \chardef\xs@tracingmacros\tracingmacros
    \tracingcommands0 \tracingmacros0\relax
    }%
  \def\xs@StartTracing{%
    \tracingcommands\xs@tracingcommands
    \tracingmacros\xs@tracingmacros
    }%
  }
\def\NormalTracing{%
  \let\xs@StopTracing\relax
  \let\xs@StartTracing\relax
  }
\NormalTracing
/

§%
finally we set everything back to normal,
set some default values and say goodbye. 
§
/
\xs@RestoreCatcodes \catcode`@=12
\SearchOrder{
  F!fPpSs;
  f!PpSs;
  P!pSs;
  p!Ss;
  S!s;
  s;
  }
\DoNotSort{pPsS}
\SearchAll{pPsS}
\XeTeXinterchartokenstate1
\endinput
/

\subsection{A third party file for ConTeXt}
\ProduceFile{t-xesearch.tex}[t-xesearch][v.0.1][\the\year.\month.\day]
\CodeEscape!
§
This file is mostly due to Wolfgang Schuster.

\mac{xs@contextmodule}
is used when the main file is loaded to set the meaning of \mac{xs@unexpanded}.
(ConTeXt commands have meaningful names, so I didn't want to rely on them as tests for ConTeXt,
because there might exist commands with the same names in other formats.)
§
\begin{code}
%D \module
%D   [     file=!FileName,
%D      version=!FileDate,
%D        title=\CONTEXT\ User Module,
%D     subtitle=XeSearch,
%D       author=Paul Isambert,
%D         date=\currentdate,
%D    copyright=Paul Isambert,
%D        email=zappathustra@free.fr,
%D      license=LaTeX Project Public License]

\writestatus{loading}{ConTeXt User Module / XeSearch}
\csname xs@contextmodule\endcsname
\input xesearch.sty
\endinput
\end{code}
\UndoCodeEscape

\StopSearching

\framebreak

\renewenvironment{theindex}{%
  \section{Index}}{}
\def\item{\par\noindent}
\printindex

\end{document}