\expandafter\ifx\csname XeTeXinputencoding\endcsname\relax \else \XeTeXinputencoding=cp1252 \fi \ifnum\month<10 \edef\month{0\the\month}\else \edef\month{\the\month}\fi \ifnum\day<10 \edef\day{0\the\day}\else \edef\day{\the\day}\fi \documentclass[article(11pt),% % produce,% Uncomment this line to produce xesearch.sty index]{codedoc} % CodeDoc declaration \ProduceFile{xesearch.sty}[xesearch][v.0.2][\the\year/\month/\day] % Fonts \usepackage{xltxtra} \setmainfont[Numbers=OldStyle,Ligatures=Common,Mapping=tex-text]{Palatino Linotype} \makeatletter % Old style numbers won't climb into % footnote status. \def\@makefnmark{% \bgroup \fontspec[Ligatures=Common,Mapping=tex-text]{Palatino Linotype}% \textsuperscript{\@thefnmark}% \egroup } \makeatother \setsansfont[Scale=.85]{Verdana} \newfontfamily\altmono[Scale=.85]{Excalibur Monospace} \def\ttslant#1{% {\fontspec[FakeSlant=.2,Scale=.85]{Excalibur Monospace}#1}% } \def\texttt#1{{\altmono#1}} % Here it is. \usepackage{xesearch} \SearchList*{logos}{\csname#1\endcsname}{?TeX,?ConTeXt,xesearch} \def\ConTeXt{Con\TeX t} % The \Xe must be redefined % because it's ugly in Verdana. \def\xesearch{% \textsf{% X% \kern-.12em \lower.2em\hbox{\reflectbox{E}}% \kern-.03em Search% }% } \def\Protect#1{% \StopSearching #1% \StartSearching{}% } % Page dimensions and colors. \usepackage[paperheight=21cm,paperwidth=31.7cm,left=2cm,right=2cm,top=2cm,bottom=2cm]{geometry} \usepackage{xcolor} \pagestyle{empty} % For the implementation. \usepackage{parcolumns} % One wonderful package. \usepackage{flowfram} \newcount\startpage\startpage3 \newcount\endpage \endpage\startpage \advance\endpage15 \newcount\codepage \codepage\endpage \advance\codepage1 \newcount\indexpage \indexpage\codepage \advance\indexpage32 %\def\startpage{3} %\def\endpage{18} %\def\indexpage{51} % \newflowframe[1]{31.7cm}{21cm}{-2cm}{-2cm}[title] \newflowframe[2-\endpage]{15cm}{17cm}{0cm}{0cm}[main] \newflowframe[\codepage-\indexpage]{27.7cm}{17cm}{0cm}{0cm}[maincode] \newflowframe[>\indexpage]{5cm}{17cm}{0cm}{0cm}[index1] \newflowframe[>\indexpage]{5cm}{17cm}{5.5cm}{0cm}[index2] \newflowframe[>\indexpage]{5cm}{17cm}{11cm}{0cm}[index3] \newflowframe[>\indexpage]{5cm}{17cm}{16.5cm}{0cm}[index4] \newflowframe[>\indexpage]{5cm}{17cm}{22cm}{0cm}[index5] \def\CodeColor{black!60!red} % \newdynamicframe[1]{11.7cm}{10cm}{16cm}{0cm}[toc] % \newdynamicframe[\startpage-\endpage]{11.7cm}{17cm}{16cm}{0cm}[exblock] \setdynamicframe*{exblock}{clear=true,backcolor={yellow!14!white}} % \newdynamicframe[>1]{5cm}{\baselineskip}{24cm}{18cm}[header] \setdynamiccontents*{header}{\fontspec{Palatino Linotype}\itshape\Xe Search user guide\ \textbullet\ \thepage} % \vtwotone[<\codepage,>\indexpage]{\paperwidth-.1cm}{yellow!11!white}{wW}{.1cm}{red!80!black}{rR} \vNtone[\codepage-\indexpage]{3}{12.2cm}{yellow!11!white}{wWw}{\paperwidth-12.3cm}{yellow!14!white}{cCc}{.1cm}{red!80!black}{rRr} % \newcount\lettercount \newcount\casecount \newcount\rancolor \newcount\ranangle \newdimen\ranwidth \newdimen\ranheight \newdimen\ranWidth \newdimen\ranHeight \newif\iftoc \newbox\letterbox \def\ranletter{% \setrannum\lettercount{1}{26}% \setrannum\casecount{0}{1}% \setrandim\ranWidth{0em}{45cm}% \setrandim\ranwidth{0pt}{\ranWidth}% \setrandim\ranHeight{0em}{25cm}% \setrandim\ranheight{0pt}{\ranHeight}% \setrannum\rancolor{0}{100}% \setrannum\ranangle{0}{360}% \tocfalse \advance\ranwidth-1cm \advance\ranheight-1cm \ifdim\ranwidth>40cm \toctrue \else \ifdim\ranheight>30cm \toctrue \else \ifdim\ranwidth>17cm \ifdim\ranheight>8.5cm \toctrue \fi \fi \fi \fi \unless\iftoc \setbox\letterbox=\hbox{% \lower\ranheight\hbox{% \kern\ranwidth \color{yellow!11!white!\the\rancolor!black}% \rotatebox{\the\ranangle}{\csname @\ifcase\casecount a\else A\fi lph\endcsname\lettercount}% } }% \wd\letterbox0cm \ht\letterbox0cm \dp\letterbox0cm \box\letterbox \fi } \newcount\generator \def\generate{% \ifnum\generator<10000 \ranletter \advance\generator1 \let\next\generate \else \ranletter \let\next\relax \fi\next } % CodeDoc verbatim environments. \ShortVerb" % % This to add to the right frame. \long\def\AppendEx#1\EOE{% \appenddynamiccontents*{exblock}{% \rightskip.3cm #1} } % % Create the title and label of % the example. \makeatletter \long\def\extitle#1#2{% \advance\excount1 \def\@currentlabel{\the\excount}% \label{#1}% \def\ExTitle{#2}% } \makeatother % % % Put the final product in a box. % I need a new box for each example % on the same page, so I simply % create a new box for each example. \def\ExampleBox{% \expandafter\newbox\csname box@\the\excount\endcsname \edef\excaption{ \noexpand\hfil\noexpand\bfseries\noexpand\scshape \noexpand\normalsize Example \the\excount: \ExTitle}% \global\expandafter\setbox\csname box@\the\excount\endcsname=\vbox\bgroup \ifcase\subexbox \or \expandafter\unvbox\csname subbox@1\endcsname \or \expandafter\unvbox\csname subbox@1\endcsname \expandafter\unvbox\csname subbox@2\endcsname \or \expandafter\unvbox\csname subbox@1\endcsname \expandafter\unvbox\csname subbox@2\endcsname \expandafter\unvbox\csname subbox@3\endcsname \fi \global\subexbox0 } % % % Layout of the example. \def\inexample#1#2{% \StopList{logos}% \small \hsize11.5cm \parindent0pt \leftskip.4cm \vskip.3cm \textcolor{\CodeColor}{#1}% \leftskip.1cm \vskip.2cm #2% } \def\MakeBox{% \egroup \expandafter\AppendEx\expandafter\vfil\expandafter\unvbox\csname box@\the\excount\endcsname\EOE } % % The example that is typeset in % the right frame. \newcount\excount \NewExample{sidex}% {\altmono#}{#}% {% \ExampleBox \inexample{\CodeInput}{\CodeOutput}% \vskip.4cm \excaption \MakeBox } % % This one also goes in the % right frame but under the % same heading as the following % sidex example. \newcount\subexbox \NewExample{sidex2}% {\altmono#}{#}% {% \global\advance\subexbox1 \expandafter\newbox\csname subbox@\the\subexbox\endcsname \expandafter\global\expandafter\setbox\csname subbox@\the\subexbox\endcsname=\vbox{% \inexample{\CodeInput}{\CodeOutput}% \vskip.1cm }% } % % Verbatim text in the main text: \NewExample{mainex}% {\par \altmono#}% {}{% \StopList{logos}% \vskip.2cm plus .1cm minus .1cm \leftskip2\parindent \CodeInput \vskip.2cm plus .1cm minus .1cm } % % Syntax highlighting examples only % show the input code, with the desired % specifications. \NewExample{hilite}% {#}{}% { \ExampleBox \inexample{\codefontspec\CodeInput}{}% \vskip.4cm \excaption \MakeBox } % % % Example with only \CodeOutput shown. \NewExample{outputonly}% {\altmono#}{#}% {% \ExampleBox \inexample{}{\CodeOutput}% \vskip.4cm \excaption \MakeBox } % How the argument to \DescribeMacro % is typeset. \newif\ifmacroalready \def\PrintMacro#1{% \unless\ifmacroalready \unless\ifdim\lastskip>0pt \vskip.1cm plus .05cm minus .05cm \fi \fi \macroalreadyfalse {\fontspec[Scale=.9]{Verdana}% \noindent \textcolor{\CodeColor}{\textbf{\llap{\textbullet\ }#1}}% \vskip.1cm plus .05cm minus .05cm \noindent }} \DescribeIndexFont{\color{red!80!black}\bfseries} % Generic macros used in the examples. \def\blue{\textcolor{blue}} \def\red{\textcolor{red}} \def\green{\textcolor{green}} \def\violet{\textcolor{blue!50!red}} \let\bold\textbf \let\italics\textit \def\frame#1{\fbox{#1}} \def\reverse#1{\reflectbox{#1}} % Another big one. \usepackage[pdfborder=0 0 0,xetex,pdfauthor={Paul Isambert},pdftitle={XeSearch user guide}]{hyperref} % Miscellanea. \newcount\remcount \def\rem{% \advance\remcount1 \par\noindent \the\remcount.~% } \let\citex\textbf \begin{document} \makeatletter \renewcommand*\l@section[2]{% \ifnum \c@tocdepth >\z@ \addpenalty\@secpenalty \addvspace{0em}% \setlength\@tempdima{1.5em}% \begingroup \parindent \z@ \rightskip \@pnumwidth \parfillskip -\@pnumwidth \leavevmode \bfseries \advance\leftskip\@tempdima \hskip -\leftskip #1\nobreak\hfil \nobreak\hb@xt@\@pnumwidth{\hss #2}\par \endgroup \fi} \makeatother \appenddynamiccontents*{toc}{\vfill\footnotesize\tableofcontents} \StartIgnore \IfFileExists{random.tex} {\input{random.tex}% \leavevmode\generate} {% \leavevmode\par You should have a nice explosion of letters here,\par but you don't have Donald Arseneau's \texttt{random.tex}.\par Download if from CTAN and run XeLaTeX again.} \StopIgnore \par \hfill\colorbox{red!80!black}{\kern1em\color{white}\Large\textbf{\Xe Search}\kern.6em}\par\vskip2pt \hfill\colorbox{red!80!black}{\kern1em\color{white}\Large\textbf{\FileVersion}\kern.6em}\par\vskip2pt \hfill\colorbox{red!80!black}{\kern1em\color{white}\Large\textbf{\FileDate}\kern.6em}\par\vskip2pt \hfill\colorbox{red!80!black}{\kern1em\color{white}\Large\textbf{Paul Isambert}\kern.6em}\par\vskip2pt \hfill\colorbox{red!80!black}{\kern1em\color{white}\Large\ttfamily\textbf{zappathustra@free.fr}\kern.6em} \framebreak \setcounter{section}{-1} \section{Changes} \long\def\Change#1#2{% \leftskip2cm \leavevmode \llap{\textbf{#1}\enspace}#2\par} {% \parindent0pt \Change{\FileDate} {% Corrected for new versions of the XeTeX executable (thanks to Yuri Robbers):\par The XeTeX executable now has an increased number of character classes to 4096. This has been accomodated by also increasing xesearch's upper bound from 255 to 4095. Not changing xesearch would result in an error. For compatibility with other versions of the executable, this solution has been implemented using system primitives rather than hard coded numbers. } \Change{2009/11/04} {% v.0.1 Corrected for ConTeXt (thanks to Wolfgang Schuster):\par Now there's a third party file, \texttt{t-xesearch.tex}, so that xesearch can be properly loaded with \texttt{\bslash usemodule[xesearch]}.\par The clash between ConTeXt's \texttt{\bslash unexpanded} macro and XeTeX's (actually $\varepsilon$-TeX's) \texttt{\bslash unexpanded} primitive has been fixed.} \Change{2009/10/24}{Initial version} } \framebreak \strut \vfill \section{Introductory remarks} \rem This set of macros requires the XeTeX engine. \rem This set of macros is totally experimental. \rem This set of macros is written with plain XeTeX, and so it should be compatible with all formats, at least if they implement such basic macros as "\newcount" or "\newif", which is the case at least for LaTeX and ConTeXt. \rem As a consequence of the preceding remark, I've used in the examples of this documentation control sequences that don't exist in any format (as far as I know) but whose meaning is transparent enough, like "\blue" or "\italics", which typeset \blue{blue} and \italics{italics}. They are not part of xesearch. \rem This set of macros tweaks XeTeX's character class mechanism badly. This mechanism was not designed to do what it does here. Anyway, since it is used mainly for non-alphabetical writing systems, there's little chance of clashing with xesearch. I have tried to make xesearch compatible with François Charette's \textsf{polyglossia} for language with special punctuation pattern, like French. I have not tried to patch \textsf{babel} German shorthands in \textsf{polyglossia}, simply because I was not able to make them work. \rem xesearch is local all the way down, that is, there's not a single global command. So it can be used in a controlled way.% \footnote{If your knowledge of TeX is confined to LaTeX, you might not be very familiar with the notion of locality to groups, since in LaTeX pretty much everything is global by default, whereas in plain TeX the contrary holds. So to make things simple, just remember that if you use xesearch inside a LaTeX environment, even one you've defined yourself with \texttt{\bslash newenvironment}, nothing will spread outside this environment. (I don't know the situation for ConTeXt, so I won't say anything.)} \rem To see what xesearch does, see \citex{example~\ref{simple}} on the right. \extitle{simple}{A Simple Example} \begin{sidex} \SearchList{color}{\csname#1\endcsname{#1}}{blue,red,green} This is blue and this is red and this is green, but apparently yellow was not defined. \end{sidex} \rem To load the package in LaTeX, say % \begin{mainex} \usepackage{xesearch} \end{mainex} % \noindent In ConTeXt: % \begin{mainex} \usemodule[xesearch] \end{mainex} % \noindent In plain XeTeX: % \begin{mainex} \input xesearch.sty \end{mainex} \clearpage \section{Let's search} \DescribeMacro{\SearchList\meta{*!}\marg{name}\marg{replacement text}\marg{list of words}}% The star and exclamation mark are optional and their relative order does not matter. Sticking to mandatory arguments for the moment, here's how this macro works: first, you give a \meta{name} to this list, for further reference. Then you specify the \meta{replacement text}, which will be substituted for all of the words in \meta{list of words} (separated by commas). In this \meta{replacement text}, the substituted word is designed by "#1", so just think about it as an argument to a control sequence. If you forget "#1", the word disappears (until we learn how to use the exclamation mark), as can be seen in \citex{example~\ref{vanish}}. \extitle{vanish}{Words As Arguments} \begin{sidex} \SearchList{list1}{\italics{#1}}{obviously} \SearchList{list2}{}{something} Obviously, I have forgotten something. \end{sidex} Note that there's still a space between \emph{forgotten} and the full stop. Where does it come from? Well, it is the space that was between \emph{forgotten} and \emph{something}. At the time when xesearch manipulates \emph{something}, this space has already been read and typeset, so it does not disappear. But there's something much more interesting in this example. As you might have noticed, the first line says: % \begin{mainex} \SearchList{list1}{\italics{#1}}{obviously} \end{mainex} % \noindent and in the text to be searched we find `\texttt{Obviously}', with an uppercase first letter. Nonetheless, it is found and modified according to the replacement text. We thus discover one basic principle of xesearch: \emph{it isn't case-sensitive by default}. Hence the two following lists \VerbCommand!() % \begin{mainex} \SearchList{list1}{!ttslant()}{word} \SearchList{list2}{!ttslant()}{Word} \end{mainex} % % \UndoVerbCommand \noindent will find exactly the same set of words, namely `\texttt{word}' `\texttt{Word}', `\texttt{woRd}', `\texttt{WORD}', etc. How scary. This isn't customary in good programming and in TeX in particular. Fortunately, this default setting can be easily changed: the optional star just after "\SearchList" will make the entire list case-sensitive. Besides, if a list is not case-sensitive, i.e. if it has no star, a star before a word in that list will make the search for that particular word case-sensitive.% \footnote{However, if \texttt{\string\SearchList} is suffixed with a star, all words in the list will be case-sensitive.} This is illustrated in \citex{example~\ref{case}}. \extitle{case}{Illustrating Case-Sensitivity} \begin{sidex2} \SearchList{Case insensitive}{\blue{#1}}{woRd} Word word woRd WORD \StopList{Case insensitive} \end{sidex2} \begin{sidex2} \SearchList*{Case sensitive}{\red{#1}}{word} Word word woRd WORD \StopList{Case sensitive} \end{sidex2} \begin{sidex} \SearchList{Mixed}{\green{#1}}{word,*Worm} Word word woRd WORD\par Worm worm woRm WORM\par \end{sidex} In this example we discover another macro, whose meaning is clear: \DescribeMacro{\StopList\marg{list of lists}} The lists, separated by commas, are turned off. Let's turn back to "\SearchList" again. It can also take an exclamation mark beside the star (the order between the two of them is not important). In this case, the word is not subsituted anymore; i.e. the replacement text will follow the word (still with "#1" standing for it). These concatenating replacements are very dangerous because they are expanded \emph{after} the search has started again. You see what I mean: if the word you've found does not endure some transformation that'll make it different from itself as far as typesetting is concerned, ooops, here's the loop. "WORD" expands to "WORD\command{WORD}" to "WORD\command{WORD\command{WORD}}", etc., and there's no way out of it. So, what's the point? The point is: the reason why those replacements are placed after the no-search area has stopped is because they are meant to host argument-taking commands to act on the rest of the streams. Such commands can't be placed in normal replacement texts without an exclamation mark, because they would stumble upon precisely what starts the search again. So be careful. Either use "!"-marked searches with non-typesetting macros, for instance to index the word, or make sure that you know exactly the many interactions you might create. The exclamation mark says it all. \citex{Example~\ref{exclam}} is silly but I hope you can see the point. \extitle{exclam}{A Silly One} \begin{sidex2} \SearchList*!{Hamlet}% { Or Not \StopSearching#1\StartSearching}% {To Be} To Be... \end{sidex2} \begin{sidex2} \SearchList!{typo}{\red{!!!}}{tipo} There's a tipo here. \end{sidex2} \begin{sidex} \SearchList!{XeTeX}{ \green}{is} This is \XeTeX.\par \end{sidex} Note the space at the beginning of the first and third replacement texts. Concatenating replacement texts (which replace nothing but whatever) stick to their targets. Besides, in the third example, "\green" would have gobbled the subsequent space. I hope you have noticed that the "Hamlet" list contains not a word but a phrase. So you know: xesearch can find phrases. Now we can't avoid going into a little more detail concerning the way xesearch works. But before that, let's see one simple macro: \DescribeMacro{\AddToList\meta{*!}\marg{name}\marg{list of words}} This adds the words to the \meta{name} list, which of course should already exist. The presence or absence of a star and/or an exclamation mark doesn't depend at all on the original list. You can see that in \citex{example~\ref{add}}. \extitle{add}{Adding Words To An Existing List\par\hfil(Another Silly One)} \begin{sidex} \SearchList{Stupid list}{\blue{#1}}{word} Word and beep. \AddToList*{Stupid list}{Beep} Or Beep and word and beep. \end{sidex} Finally, the words in "\SearchList" and "\AddToList" should be made of characters only, but these can be the product of expansion. For instance, if you have "\def\word{a word}", then you can say "\AddToList{mylist}{\word}". If anything else shows up xesearch won't accept the word (and you'll probably have a good deal of errors beforehand). \section{What xesearch looks for and how it finds it} xesearch can see only two things: letters and non-letters. Non-letters it doesn't like because it's then forced to spit the letters it has gathered and form a word, and most times it's not allowed to take it away. (Un)fortunately, xesearch is quite short-sighted: it considers letters what you tell it are not non-letters (xesearch apparently has some formal education in philosophy). More seriously (and clearly), xesearch forms a word as long as there are letters. As you can see in \citex{example~\ref{macro}}, macros are expanded and if they yield letters, xesearch can recognize a word. % \extitle{macro}{Macros Can't Hide Letters}% \begin{sidex} \SearchList{Will it find me?}{\blue{#1}}{word} \def\rd{rd} Here is a wo\rd. \end{sidex}% % So when does it stop searching? There are two main cases: \remcount0 \rem It encounters a space, or any primitive control sequence. The former case is quite natural: you want spaces to delimit words (including "\skip"s and associates). But the latter is less obvious: as soon as TeX does something that is not typesetting letters, xesearch gives up. And this includes something as seemingly innocuous as a "\relax", as you can see in \citex{example~\ref{relax}}. That's the reason why, for instance, xesearch will never find \Protect{\texttt{TeX} in \texttt{\bslash TeX}}: the definition contains many operations that aren't strictly speaking putting letters in the stream. Fortunately, the bulk of a manuscript is made of letters and spaces, and one seldom inserts "\relax"es in the middle of words. \extitle{relax}{But Primitive Can} \begin{sidex} \SearchList{This time I'm prepared}{\blue{#1}}{word} \def\rd{\relax rd} Here is a wo\rd. \end{sidex} % \rem xesearch encounters a character that you've declared as a non-letter, that is a word boundary. This leads us to the following macro: \DescribeMacro{\MakeBoundary\marg{characters}}\macroalreadytrue \DescribeMacro{\UndoBoundary\marg{characters}} The characters should be simply put one after the other, as in for instance \begin{mainex} \MakeBoundary{,;:!} \UndoBoundary{?()\{\}} \end{mainex} \noindent The basic set of such characters is as follows% \footnote{That is: full stop, comma, semi-colon, colon, exclamation mark, question mark, dash, inverted comma, apostrophe (i.e. left and right quote), parentheses, brackets, curly braces. This is rather arbitrary, despite some basic sensible assumptions.} % \begin{mainex} .,;:!?-`'()[]{} \end{mainex} % \noindent Now, if xesearch encounters a character that you've made into a boundary, it will stop forming a word and evaluate what it has gathered. Conversely, such characters cannot appear in the list of words in "\SearchList"; they wouldn't be found anyway. This is illustrated in \citex{example~\ref{boundary}}. % \extitle{boundary}{Where Words Start And Stop} \begin{sidex2} \MakeBoundary{/} \SearchList{separated}{\ddag#1\ddag}{waka,jawaka} Waka/Jawaka \end{sidex2} \begin{sidex} \UndoBoundary{/} \SearchList{united}{\ddag#1\ddag}{waka/jawaka} Waka/Jawaka \end{sidex} There is one big difference between those two cases. Characters defined as boundaries are not only word boundaries but also phrase boundaries. If xesearch smells a possible phrase, spaces and primitive commands won't stop it, whereas boundary characters will. You can see that in \citex{example~\ref{phrase}}. This example also illustrates one fact and one sad truth. The fact is that words aren't searched for inside phrases; so the first two \emph{you}'s were not turned to italics, since they belonged to \emph{you are what you is}. The third one, one the other hand, was recognized since \emph{you are neither good nor bad} was missed because of the intervenig comma. \extitle{phrase}{Phrases And Words} \begin{sidex} \SearchList{word}{\italics{#1}}{you} \SearchList{phrases}{\red{#1}} {you are what you is, you are neither good nor bad} You are what\kern1cm % What a kern! you is but you are neither good, nor bad. \end{sidex} The sad truth is that the "\kern" disappeared. This is one shortcoming of xesearch: primitives disappear when they're in the middle of a possible phrase, even if that phrase is not recognized in the end. By `possible phrase' I mean a string of words that form the beginning of a phrase that you want identified, e.g. the kern in \VerbCommand![] \begin{mainex} \SearchList{H(a)unting primitives}{!ttslant[]}% {xesearch feeds on kerns} xesearch feeds on\kern1cm skips \end{mainex} \UndoVerbCommand \noindent will disappear, even though no string matches in the end. Hopefully such commands are rather rare in the bulk of a document. If some are unavoidable --- and for other uses too --- there exists a pair of commands, whose function I probably don't need to explain (except that "\StartSearching" doesn't need to be issued at the beginning of your document, it is there by default): \DescribeMacro\StartSearching\macroalreadytrue \DescribeMacro\StopSearching \vskip-\lastskip \section{(A very blunt form of) regular expressions} Words are cool, and phrases too. But life doesn't always reach their level of achievement. Sometimes you don't know what you want. Something beginning with a `B', why not? or maybe something that ends in `et'? Then look at \citex{example~\ref{affixes}}. \extitle{affixes}{Prefixes And Suffixes} \begin{sidex} \SearchList{Affixes}{\red{#1}}{*B?,?et,?ET} A \italics{Black Page} in B, actually some kind of duet for Terry Bozzio and Chad Wackerman, lay on the drumset beside the PET facility. \end{sidex} There are several things to see in this example. First, xesearch has entered the "\italics" command and imposed its will.\footnote{Provided I'm using commands that don't cancel each other, like plain TeX's \texttt{\bslash bf} and \texttt{\bslash it}.} Next, affixes\footnote{I use the word \emph{affixes} to refer to both \emph{prefixes} (like \texttt{B?}) and \emph{suffixes} (like \texttt{?et}). From a linguistic point of view, prefixes and suffixes (and infixes, actually) are indeed affixes, but from the same point a view, what we're talking about here has nothing to do with prefixes or suffixes, just with bits of words. I hope you don't mind.} are also sensitive to case-sensitivity, so to speak, since \emph{beside} was not identified ("*B?" being case-sensitive), whereas \emph{PET} was found ("?et" not being case-sensitive). Note that a word matches an affix search if it is at least as long as the specified part of the affix. Thus, \emph{B} matches "B?". So the question mark means `from zero to any number of additional letters,' and not `at least one additional letter.' Phrases can take only suffixes, and they affect the last word only. So \VerbCommand!() \begin{mainex} \SearchList{list}{!ttslant()}{some interesting wor?} \end{mainex} \UndoVerbCommand \noindent will find \texttt{some interesting world}, \texttt{some interesting words}, but not \texttt{some interesting word thesaurus}. An affix mark anywhere else will have no effect. Marking the unspecified part of a word with "?" is the only possibility for the question mark to enter a "\SearchList", and obviously it doesn't stand for itself. So, unless of course you undo it as a string boundary, "?" can appear only at the beginning or the end of a word.\footnote{And if a star is present, it should precede the question mark.} In any other place, it will be recognized as a boundary that has no right to be there and you'll be blamed. This means that infixes don't exist in xesearch, i.e. you can't say "B?et" to search for "bullet", for instance. Also, you can't say "?ull?" to match "bullet". One affix at a time. Finally, don't try to use a joker, i.e. \VerbCommand!() \begin{mainex} \SearchList{list}{!ttslant()}{?} \end{mainex} \UndoVerbCommand \noindent as an attempt to match all words. This won't work.% \footnote{If you want to match all words\par \noindent\texttt{\bslash SearchList\{list\}\{\ttslant{}\}\{a?,b?,...,z?\}}\par \noindent should do. Ok, now you've read it, you might have the impression that the title of this section verges on dishonesty. You might be right.} \section{Search order(s)} Now we shall see what happens when a word is matched by several searches. There are three different cases: \remcount0 \rem A word is matched by two or more strictly identical searches, e.g.: \VerbCommand!() \begin{mainex} \SearchList{list1}{!ttslant()}{word} \SearchList{list2}{!ttslant()}{word} ... word ... \end{mainex} \noindent\rem A word is matched by two or more prefixes or two or more suffixes identical in case-sensitivity, e.g.: \begin{mainex} \SearchList{list1}{!ttslant()}{*wor?} \SearchList{list2}{!ttslant()}{*wo?} ... word ... \end{mainex} \noindent\rem A word is matched by two or more different searches, e.g.: \begin{mainex} \SearchList{list1}{!ttslant()}{*wor?} \SearchList{list2}{!ttslant()}{word} \SearchList{list3}{!ttslant()}{?ord} ... word ... \end{mainex} \UndoVerbCommand \vskip-\lastskip \subsection{Strictly identical searches} In this case, the word will execute all the replacement texts. Their interactions depend on the way they are defined: the replacement texts that are defined without an exclamation mark take as arguments the replacement texts that are defined just before them and will themselves become arguments to subsequent replacement texts. See \citex{example~\ref{nested}} \extitle{nested}{Nested Replacement Texts} \begin{sidex} \SearchList{list1}{\blue{#1}}{blue word} \SearchList{list2}{\dag#1\dag}{blue word} \SearchList{list3}{\ddag#1\ddag}{blue word} This blue word wears earrings and is equivalent to \ddag\dag\blue{term}\dag\ddag. \end{sidex} If the replacement texts are defined with and exclamation mark, they are simply concatenated, and most importantly, their argument is the word itself alone, not the accumulation of previous remplacement texts. See \citex{example~\ref{concatenation}}. Of course, if a word is matched by both kinds of replacement texts, the same rules apply, as in \citex{example~\ref{both}}, where you can also be entertained by some not-very-fun-but-% you-can-hopefully-see-the-point-again fiddling with !-marked macros. If you want to know what those three "\expandafter"s are doing here, see section~\ref{texnic}. \extitle{concatenation}{Concatenation\par\hfil(Yet Another Silly Example)} \begin{sidex} \SearchList!{list1}{+}{wor?} \SearchList!{list2}{\dag}{wor?} \SearchList!{list3}{\ddag}{wor?} This word is a freight train. \end{sidex} \extitle{both}{Everything Together\par\hfil(This Is Mind-Blowing)} \begin{sidex} \SearchList{list1}{\green{#1}}{*?ORD} \SearchList{list2}{\ddag#1\ddag}{*?ORD} \def\whisper#1{\italics{ (#1)}} \def\ingreen{in green} \SearchList!{list3} {\expandafter\expandafter\expandafter\whisper} {*?ORD} \SearchList!{list4}{\ingreen}{*?ORD} This WORD must be upset. \end{sidex} \subsection{Affixes with identical characteristics} When a word is found by two or more affixes of the same kind (i.e. only prefixes or only suffixes) and with the same case-sensitivity, then you decide. xesearch provides the following commands: \DescribeMacro{\SortByLength\meta*\marg{pPsS}}\macroalreadytrue \DescribeMacro{\DoNotSort\marg{pPsS}}\macroalreadytrue \DescribeMacro{\SearchAll\marg{pPsS}}\macroalreadytrue \DescribeMacro{\SearchOnlyOne\marg{pPsS}} "p", "P", "s" and "S" are shorthands for (respectively) `case-insensitive prefix', `case-sensitive prefix', `case-insensitive suffix' and `case-sensitive suffix'. They refer to the type of affix to modify and those commands can take one or several of them, e.g. "\SearchAll{pSP}". By default, affixes follow the same rules as full words: each replacement text will take the replacement text defined just before as argument. But you can also create an order between them: with "\SortByLength", longer affixes match words before shorter ones, and their replacement texts are thus more deeply nested; adding a star to "\SortByLength" reverses the order: shorter affixes before longer ones. "\DoNotSort" resets to default, i.e. replacement texts follow the order in which they were defined. See \citex{example~\ref{affixorder}}. \extitle{affixorder}{This Is Fascinating} \begin{sidex} \SearchList{Three letters}{\ddag#1\ddag}{*adv?} \SearchList{Two letters}{\red{#1}}{*ad?} \SearchList{Four letters}{\dag#1\dag}{*adve?} \SortByLength{P} adverb \SortByLength*{P} adverb \DoNotSort{P} adverb \end{sidex} "\SearchAll" and "\SearchOnlyOne" sets what should happen when a word is matched by an affix: shall the search stop, or shall xesearch continue to investigate whether other affixes might fit too? By default, all affixes are tested, but you might want a different behavior. Thus "\SearchOnlyOne{PS}" will make case-sensitive prefixes and suffixes search only once (and thus the order defined just before becomes extremely important) while "\SearchAll{PS}" will return to default, as illustrated in \citex{example~\ref{searchall}}. \subsection{Different searches} % % The example is after the new section so it'll go on % the right page. % \extitle{searchall}{This Guy Sure Ain't No David Foster Wallace} \begin{sidex} \SearchList{just a list}{\blue{#1}}{bl?,*bo?} \SearchList{just another list}{\bold{#1}}{blu?,*bol?} \SearchOnlyOne{P} Blue and bold and \SortByLength{P} bold and blue. \end{sidex} Finally, we have to see what xesearch should do when several searches match a word. Once again, you decide, thanks to the following command: \DescribeMacro{\SearchOrder\marg{order and inhibitions}} You know what "p", "P", "s" and "S" mean; "f" and "F" mean `case-insensitive full word' and `case-sensitive full word.' In the macro above, \meta{order and inhibitions} is a list of one or more sequences like \texttt{f\red{!ps};} (with the semi-colon as part of the expression) in which the red part is optional and which means: if a word matches a full-word case-insensitive search, then xesearch will not test case-insensitive prefixes and suffixes on this word. Such declarations are put one after the other, and this defines the search order. For instance, the default order for xesearch is: \begin{mainex} \SearchOrder{ F!fPpSs; f!PpSs; P!pSs; p!Ss; S!s; s; } \end{mainex} \noindent and it simply means that full words should be searched for before prefixes, and prefixes before suffixes, with case-sensitive search first in each case, and that any successful search inhibits any subsequent test. You can have as many sequences as you wish. If XeTeX goes crazy and never terminates, then you've probably forgotten a semi-colon (I do it very frequently). See \citex{example~\ref{order}} for an illustration. \extitle{order}{Search Order} \begin{sidex} \SearchList{word}{\green{#1}}{*Word} \SearchList{prefix}{\frame{#1}}{wor?} \SearchList{suffix}{\reverse{#1}}{?ord} \SearchOrder{F;p;s;} This Word is well-matched. \SearchOrder{F!p;p;S;} This Word is not so well-matched anymore. \SearchOrder{f;} This Word is not matched at all. \end{sidex} Remember that e.g. "word?" will find `"word"' as a prefix, not as a full word, so that `"word"' will not be found if you say for instance "\SearchList{list}{"\ttslant{}"}{word?}" and "\SearchOrdef{f;}". Finally, although something like "\SearchOrder{f;}" is perfectly okay to search for case-insensitive full words only, "\SearchOrder{;}" will only make XeTeX crazy; "\StopSearching" is simpler. \section{Some TeX\relax nical matters}\label{texnic} This section is not vital to the comprehension of xesearch, but it may be useful. \DescribeMacro\PrefixFound\macroalreadytrue \DescribeMacro\SuffixFound\macroalreadytrue \DescribeMacro\AffixFound When a word is found thanks to an affix search, the prefix or suffix used is stored in the relevant macros. If there are several matching affixes, the last prefix and the last suffix win in their respective categories, and between them the same rule apply for "\AffixFound". These macros are available as long as the search has not started again, i.e. they're fully available in normal replacement texts, but in \texttt{!}-marked definitions they're erased as soon as a letter is typeset, so they can be used only at the very beginning. The rest of the time they are empty. The affix itself respects the case in which it was declared if it is case-sensitive, but it is in lowercase otherwise, however it was fed to "\SearchList". See \citex{example~\ref{found}}. \extitle{found}{Finding Affixes} \begin{sidex2} \SearchList{A case-sensitive suffix}{Suf\blue\SuffixFound}{*?FiX} SufFiX. \end{sidex2} \begin{sidex} \SearchList{A case-insensitive affix}{\blue\AffixFound fix}{Pre?} PREfix. \end{sidex} \DescribeMacro\PatchOutput\macroalreadytrue \DescribeMacro\NormalOutput By default, xesearch doesn't patch the output routine so footers and headers are searched. This can be done by these two commands. "\PatchOutput" should of course be issued after any modification to the output routine. "\NormalOutput" restores the value of the output routine at work when "\PatchOutput" was executed. \DescribeMacro\PatchTracing\macroalreadytrue \DescribeMacro\NormalTracing If you want to give a look at your log file with some tracing on, you will find hundreds if not thousands of totally uninformative lines. That's xesearch recursively discovering new letters and testing words. With "\PatchTracing", xesearch will try to keep quiet during those painful moments, i.e. "\tracingcommands" and "\tracingmacros" will be turned to zero. It can't exactly be totally silent, so just know that all its words begin with "xs@". "\NormalTracing" lets xesearch express itself again. Now just consider \citex{example~\ref{catcode}}. When xesearch reads the input, it introduces itself to all the letters it doesn't know. Most importantly, it writes down some information about them, like their catcode. Now, if a letter is met with a given category catcode, that's the way xesearch will remember it, and this will influence how prefixes and suffixes are recognized. More precisely: the identification of a letter (e.g. the first occurence of it in the typestting stream) and its definition as part of an affix should be done under the same category code. % Ok, I had to do this in advance because of % the internal mechanisms of the CodeDoc class % and the flowfram package. % \catcode`\Z=12 \SearchList{makeZ}{}{Z} \StopList{makeZ} \catcode`\Z=11 \extitle{catcode}{The Mysterious Z} \begin{sidex} \catcode`\Z=12 Here's a Z. \catcode`\Z=11 \SearchList{fz}{\italics{#1}}{Frank Zap?} Look, here comes Frank Zappa! \StopList{fz} \catcode`\Z=12 \SearchList{true fz}{\italics{#1}}{Frank Zap?} One more time for the world. Here comes Frank Zappa! \end{sidex} Note that in \citex{example~\ref{catcode}} I first had to stop the "fz" list, otherwise the prefix "Frank Zap?" would not have been recreated. Another solution would have been to create another prefix like "Frank Za?" or "*Frank Zap?". Finally, here's how replacement texts are processed. Suppose you have: \begin{mainex} \SearchList{listone}{\italics{#1}}{word} \SearchList{listtwo}{\blue{#1}}{word} \SearchList{listthree}{\bold{#1}}{word} \end{mainex} \noindent then xesearch does something like this: \begin{mainex} \def\command@listone#1{\italics{#1}} \def\command@listtwo#1{\blue{#1}} \def\command@listthree#1{\bold{#1}} \end{mainex} \noindent and when "word" is encountered it is turned to \begin{mainex} \expandafter\command@listthree\expandafter{% \expandafter\command@listtwo\expandafter{% \expandafter\command@listone\expandafter{\WORD}}} \end{mainex} \noindent where "\WORD" contains exactly "word"; as you can see, this is equivalent to \begin{mainex} \command@listthree{\command@listtwo{\command@listone{word}}} \end{mainex} \noindent which you won't have failed to notice is not equivalent to \begin{mainex} \bold{\blue{\italics{word}}} \end{mainex} \noindent although in this example the difference is immaterial. Now, if you really want three expansions with superior precision on one word, you probably don't need xesearch: just use a good old macro instead. Finally, !-marked replacement texts are simply concatenated, as in: \begin{mainex} \expandafter\command@listone\expandafter{\WORD} \expandafter\command@listthree\expandafter{\WORD} \expandafter\command@listtwo\expandafter{\WORD} \end{mainex} \noindent Now you can see the reason for the three "\expandafter"'s in \citex{example~\ref{both}}. \section{Examples} xesearch was first designed as the basis for the \textsf{\Xe Index} package, an automatic indexing package for XeLaTeX. It developped into a stand-alone project, and standing so alone that there are no other application yet. So here are some ideas. First, this document has the following list: \begin{mainex} \SearchList*{logos}{\csname#1\endcsname}{?TeX,?ConTeXt,xesearch} \end{mainex} \noindent (with "\xesearch" properly defined beforehand) so throughout this document I was able to type \Protect{`\texttt{xesearch can do this or that}'} to produce `xesearch can do this or that'. That's not fascinating but it was a test. Being a linguist I can also directly paste examples from my database and turn on xesearch to highlight some words. For instance, suppose you're studying the grammaticalization of, say, \emph{going to} in English,\footnote{If you're a linguist, I apologize for my lack of originality.} and you have many examples. Then you just create a command like "\startexample", or patch an existing command to activate xesearch just for this stretch of text, among other things. For instance: \VerbCommand!() \begin{mainex} \SearchList{goingto}{\bold{#1}}{going to} \def\startexample{% !ttslant(!color(!CodeColor)Here you can modify margins, for instance.) \StartSearching } \def\stopexample{% \StopSearching !ttslant(!color(!CodeColor)Here you restore previous values.) } \end{mainex} \UndoVerbCommand \noindent Otherwise you can locally use "\StopList" if you're searching the rest of the document too. What follows are some sketchy ideas. Concerning syntax highlighting, I won't try to compete with the \textsf{listings} package. \subsection{Spelling} \extitle{spelling}{The Words In Red Don't Belong To The Top 40,000} \begin{outputonly} \IfFileExists{english.dic} {\input{english.dic}% \SearchList{spelling}{% \lowercase{\ifcsname##1@wordlist\endcsname}% ##1% \else \red{##1}% \fi} {a?,b?,c?,d?,e?,f?,g?,h?,i?,j?,k?,l?,m?, n?,o?,p?,q?,r?,s?,t?,u?,v?,w?,x?,y?,z?} \SearchOrder{p;} }% {% \emph{Unrecognized words should be in red, but you should create \texttt{english.dic} beforehand}% } Stately, plump Buck Mulligan came from the stairhead, bearing a bowl of lather on which a mirror and a razor lay crossed. A yellow dressinggown, ungirdled, was sustained gently behind him on the mild morning air. He held the bowl aloft and intoned: --- \textit{Introibo ad altare Dei}. Halted, he peered down the dark winding stairs and called out coarsely: --- Come up, Kinch! Come up, you fearful jesuit! Solemnly he came forward and mounted the round gunrest. He faced about and blessed gravely thrice the tower, the surrounding land and the awaking mountains. Then, catching sight of Stephen Dedalus, he bent towards him and made rapid crosses in the air, gurgling in his throat and shaking his head. Stephen Dedalus, displeased and sleepy, leaned his arms on the top of the staircase and looked coldly at the shaking gurgling face that blessed him, equine in its length, and at the light untonsured hair, grained and hued like pale oak. Buck Mulligan peeped an instant under the mirror and then covered the bowl smartly. --- Back to barracks! he said sternly. He added in a preacher's tone: --- For this, O dearly beloved, is the genuine Christine: body and soul and blood and ouns. Slow music, please. Shut your eyes, gents. One moment. A little trouble about those white corpuscles. Silence, all. \end{outputonly} Here's a recipe to create an English spellchecker. Take the list of the 40,000 most frequent words of English by Wiktionary: \url{http://en.wiktionary.org/wiki/Wiktionary:Frequency_lists#English}. Use TeX to turn it into a file, say "english.dic", whose only content is "\csname"\texttt{\ttslant{}}"@dic\endcsname" for each word of the list, with \ttslant{} in lowercase. What! you exclaim, that creates 40,000 control sequences! True. But TeX distributions can easily do that today. Input "english.dic" at the beginning of your document. Then set up xesearch as follows: \begin{mainex} \SearchList{spelling}{% \lowercase{\ifcsname#1@dic\endcsname}% #1% \else \red{#1}% \fi} {a?,b?,c?,d?,e?,f?,g?,h?,i?,j?,k?,l?,m?, n?,o?,p?,q?,r?,s?,t?,u?,v?,w?,x?,y?,z?} \SearchOrder{p;} \end{mainex} \noindent Now, for each word, xesearch checks whether it belongs to the frequency list. If it doesn't, it puts it in red, thus signaling a likely spelling error. It could also issue an error message, or whatever. Some words will never belong to that list. Then we use a simple macro to add them beforehand: \begin{mainex} \def\AddWord#1{\lowercase{\csname#1@dic\endcsname}} \end{mainex} \noindent We could also create more specific macros like "\AddRegularVerb" which from e.g. "change" would add "change", "changes", "changed", "changing". TeX could also rewrite "english.dic" on the fly so there'd be no need to respecify those words on every document. And so on and so forth. Using a list like the frequency list is important because we want all forms of a word to appear; i.e. organized word lists have "hear" and not "hears", because there exists either an algorithm or at least the user's brain to derive "hears" from "hear". \subsection{Word count} Another simple use of xesearch is counting words in a document. We define a case-insensitive list with all letters as prefixes, so all words will be matched (we could add numbers too), as we did in the previous example. Supposing we want words like \emph{don't} to be counted as one word, then we remove the apostrophe from the word boundaries (in case it signals a dialogue, the following space will delimit the word anyway). And we define the search order as case-sensitive prefixes only, because we don't need anything else. The "\shownumber" macro is clear, I believe. In the first version of the text on the right it is "\let" to "\relax". It's just for fun. The "\advance" on "\wordcount" has to be "\global" because there might be (hidden) groups in the text, for instance in font-changing commands. \begin{mainex} \newcount\wordcount \def\shownumber{% \raise.6\baselineskip\hbox to0pt{\hss\tiny\red{\the\wordcount}} } \SearchList!{wordcount}{\global\advance\wordcount1\shownumber{}} {a?,b?,c?,d?,e?,f?,g?,h?,i?,j?,k?,l?,m?, n?,o?,p?,q?,r?,s?,t?,u?,v?,w?,x?,y?,z?} \UndoBoundary{'} \SearchOrder{p;} \end{mainex} \extitle{wordcount}{Counting Words} \begin{outputonly} \newcount\wordcount \let\shownumber\relax \SearchList!{wordcount}{\global\advance\wordcount1\shownumber{}} {a?,b?,c?,d?,e?,f?,g?,h?,i?,j?,k?,l?,m?,n?,o?,p?,q?,r?,s?,t?,u?,v?,w?,x?,y?,z?} \UndoBoundary{'} \SearchOrder{p;} Stately, plump Buck Mulligan came from the stairhead, bearing a bowl of lather on which a mirror and a razor lay crossed. A yellow dressinggown, ungirdled, was sustained gently behind him on the mild morning air. He held the bowl aloft and intoned: --- \textit{Introibo ad altare Dei}. Halted, he peered down the dark winding stairs and called out coarsely: --- Come up, Kinch! Come up, you fearful jesuit! Solemnly he came forward and mounted the round gunrest. He faced about and blessed gravely thrice the tower, the surrounding land and the awaking mountains. Then, catching sight of Stephen Dedalus, he bent towards him and made rapid crosses in the air, gurgling in his throat and shaking his head. Stephen Dedalus, displeased and sleepy, leaned his arms on the top of the staircase and looked coldly at the shaking gurgling face that blessed him, equine in its length, and at the light untonsured hair, grained and hued like pale oak. {\leavevmode\par\StopList{wordcount} \emph{There are \the\wordcount\ words.}} \leavevmode\par \def\shownumber{\raise.6\baselineskip\hbox to0pt{\hss\tiny\red{\the\wordcount}}} Buck Mulligan peeped an instant under the mirror and then covered the bowl smartly. --- Back to barracks! he said sternly. He added in a preacher's tone: --- For this, O dearly beloved, is the genuine Christine: body and soul and blood and ouns. Slow music, please. Shut your eyes, gents. One moment. A little trouble about those white corpuscles. Silence, all. \leavevmode\par \StopList{wordcount} \emph{The total number of words is: \the\wordcount.} \end{outputonly} \subsection{Syntax highlighting: TeX} At first I'd designed a colorful scheme but it was ugly, so here's something much more sober. We simply create an empty list in which we design a macro to add "\string"ed primitive commands. \begin{mainex} \SearchList{hilitex}{\bold{#1}}{} \def\Add#1{% \AddToList{hilitex}{#1}% } \expandafter\Add\expandafter{\string\def} \expandafter\Add\expandafter{\string\expandafter} \expandafter\Add\expandafter{\string\else} \expandafter\Add\expandafter{\string\fi} \expandafter\Add\expandafter{\string\else} \end{mainex} \noindent We can't do that for prefixes (and we need them if we want e.g. to underline all user-defined "\if"), because they would be "\string"ed and thus of category code 12, which \citex{example~\ref{catcode}} has shown was a trouble. So we design a macro to add words with a backslash added beforehand. And we use it. \begin{mainex} \def\gobble#1{} \def\AddPrefix#1{% \AddToList*{hilitex}{\expandafter\gobble\string\\#1?}% } \AddPrefix{new} \AddPrefix{if} \end{mainex} We need one last thing. We want "\" to be recognized as a letter, because it should be put in bold too. But we also want it to be recognized as a string boundary. The only solution is to make it active and let it expand to "\relax" (a natural string boundary) plus itself in catcode 12 (which is not defined with "\MakeBoundary" and is thus a letter for xesearch). \goodbreak \begin{mainex} \catcode`\|=0 \catcode`\\=13 |def\{|relax|string\} \end{mainex} \noindent If we pack everything into an usual macro to make verbatim text, then we obtain something along the lines of \citex{example~\ref{hilite}}. Don't forget the typewriter font for the real thrill! The implementation section of this documentation displays a subtler kind of syntax highlighting, viz. "\def" and associates put the following command in red and index it too, except commands I don't want to see treated as such, like temporary commands. However, the implementation depends on CodeDoc's macros, so I won't show it here, although you can look at the source. % % TeX example... \extitle{hilite}{\TeX\ Highlighted} \def\codefontspec{\fontspec{Courier New}\large} \bgroup \catcode`\|=0 \catcode`\\=13 |gdef|makeescape{% |catcode`|\=13 |gdef\{|relax|string\} } |egroup \def\CodeSpec{% \def\bold##1{% {\fontspec[FakeBold=1.5,Scale=.85]{Excalibur Monospace}##1}% }% \SearchList{hilitex}{\bold{##1}}{}% \def\Add##1{% \AddToList*{hilitex}{##1}% }% \expandafter\Add\expandafter{\string\def}% \expandafter\Add\expandafter{\string\expandafter}% \expandafter\Add\expandafter{\string\else}% \expandafter\Add\expandafter{\string\fi}% \expandafter\Add\expandafter{\string\else}% \def\gobble##1{}% \def\AddPrefix##1{% \expandafter\expandafter\expandafter\Add\expandafter\expandafter\expandafter{% \expandafter\gobble\string\\##1}% }% \AddPrefix{new?} \AddPrefix{if?}% }% \VerbCommand!() \begin{hilite} !makeescape()!CodeSpec() \def\mycommand#1{% \expandafter\myothercommand#1% \ifwhatever \newtoks\mytoks \mytoks={...}% \else \mytoks={...}% \fi } \end{hilite} \UndoVerbCommand \subsection{Syntax highlighting: HTML} Coloring HTML is rather easy. The most complicated part concerns word boundaries. xesearch is used to find elements and attributes. Only case-insensitive full words need to be searched for. \begin{mainex} \MakeBoundary{<>/=} \SearchList{elements}{\bold{\violet{#1}}} {html,meta,head,body,span,p,div,b,h1,img} \SearchList{attributes}{\bold{#1}}{align,class,style,src} \SearchOrder{f;} \end{mainex} \noindent "<" and ">" delimit markup, so we use them to switch xesearch on and off. \begin{mainex} \catcode`\<=13 \catcode`\>=13 \def<{\bgroup\catcode`\'=13\catcode`\"=13\char`\<\StartSearching{}} \def>{\egroup\char`\>} \end{mainex} \noindent Quoted text should not be searched, because values to attributes are simply put in blue. Double quotes and single quotes should exclude each other. \begin{mainex} \catcode`\"=13 \newif\ifdbbegin \def"{% \unless\ifsgbegin \ifdbbegin \egroup \char`\" \else \char`\" \bgroup \dbbegintrue \color{blue}\StopSearching \fi \fi } \catcode`\'=13 \newif\ifsgbegin \def'{% \unless\ifdbbegin \ifsgbegin \egroup \char`\' \else \char`\' \bgroup \sgbegintrue \color{blue}\StopSearching \fi \fi } \end{mainex} \noindent "src" and "href" take links as values, usually underlined. So we do just that. \begin{mainex} \SearchList!{links}{\makelink}{src,href} \def\makelink=#1{% \ifx#1" \expandafter\makedbqlink \else \expandafter\makesgqlink \fi } \def\makedbqlink#1"{\StopSearching="\underline{#1}"\StartSearching} \def\makesgqlink#1'{\StopSearching='\underline{#1}'\StartSearching} \end{mainex} \noindent The "&...;" character denotation is often in red. \begin{mainex} \catcode`\&=13 \def{% \char`\& \red{#1;}% } \end{mainex} \noindent Finally we turn off TeX's special characters (quotes are made active by "<" and ">"), and we make some useful adjustments. \begin{mainex} \catcode`\"=12 \catcode`\'=12 \catcode`\#=12 \catcode`\_=12 \catcode`\^=12 \catcode`\%=12 \obeylines \def\par{\leavevmode\endgraf} \parindent0pt \end{mainex} \citex{Example~\ref{html}} shows the bottom of the CTAN page. \bgroup \catcode`\&=13 \catcode`\"=13 \catcode`\'=13 \catcode`\<=13 \catcode`\>=13 \gdef\makehtmlchar{% \catcode`\&=13 \def&##1;{% \char`\& \bgroup\color{red}##1;\egroup }% \newif\ifdbbegin \def"{% \unless\ifsgbegin \ifdbbegin \egroup \char`\" \else \char`\" \bgroup \dbbegintrue \color{blue}% \StopSearching \fi \fi }% \newif\ifsgbegin \def'{% \unless\ifdbbegin \ifsgbegin \egroup \char`\' \else \char`\' \bgroup \sgbegintrue \color{blue}% \StopSearching \fi \fi }% \def<{\bgroup\catcode`\'=13\catcode`\"=13\char`\<\StartSearching{}}% \def>{\egroup\char`\>}% } \gdef\makehtml{% \makehtmlchar \MakeBoundary{<>/=}% \catcode`\<=13 \catcode`\>=13 \SearchList{elements}{\textbf{\textcolor{blue!50!red}{##1}}}{p,a,div,hr,table,tr,td,body,html,span}% \SearchList{attributes}{\textbf{##1}}{href,width,id,align}% \SearchList!{links}{\makelink}{src,href}% \SearchOrder{f;}% \def\makelink=##1{% \ifx##1" \expandafter\makedbqlink \else \expandafter\makesgqlink \fi } \def\makedbqlink##1"{\StopSearching="\underline{##1}"\StartSearching}% \def\makesgqlink##1'{\StopSearching='\underline{##1}'\StartSearching}% \catcode`\"=12 \catcode`\'=12 \catcode`\#=12 \catcode`\_=12 \catcode`\^=12 \catcode`\%=12 \obeylines \def\par{\leavevmode\endgraf}% \parindent0pt% \StopSearching }% \egroup \def\codefontspec{\ttfamily} \def\myskip{-.5em} \extitle{html}{Colorful HTML} \VerbCommand!() \begin{hilite} !makehtml()!color(black)!leftskip!myskip ...

A perhaps less taxing way to express your appreciation is to make a donation — small efforts add up!

\end{hilite} \UndoVerbCommand \framebreak \section{Implementation} \newif\ifdef \def\PrintMacro#1{} \def\csprefix{cs} \def\exprefix{ex} {\catcode`\#=12 \gdef\hash{#} \catcode`\%=12 \gdef\com{%} } \def\makedef#1#2#3{% \def\temp{#2#3}% \ifx\temp\csprefix \def\defnext{% #1#2#3% }% \else \ifx\temp\exprefix \def\defnext{% #1#2#3% }% \else \def\temp{#1}% \ifx\temp\hash \def\defnext{% #1#2#3% }% \else \ifx\temp\com \def\defnext{% #1#2#3% }% \else \deftrue \def\defnext{#2#3}% \fi \fi \fi \fi\defnext } \def\codesearch{% \StopList{logos}% \MakeBoundary{\\}% \MakeBoundary{\#}% \MakeBoundary{=}% \MakeBoundary{\%}% \SearchList{deff}{% \ifdef \deffalse \IgnorePrefix{xs@}\expandafter\DefineMacro\expandafter{\csname##1\endcsname}% \textcolor{red}{\char`\\##1}% \else ##1% \fi}{xs@?,a?,b?,c?,d?,e?,f?,g?,h?,i?,j?,k?,l?,m?,n?,o?,p?,q?,r?,s?,t?,u?,v?,w?,x?,y?,z?}% \SearchList{defff}{% \ifdef \deffalse \IgnorePrefix{xs@@}\expandafter\DefineMacro\expandafter{\csname##1\endcsname}% \textcolor{red}{\char`\\##1}% \else ##1% \fi}{xs@@?}% \SearchList*{nodeff}{\ifdef\deffalse\char`\\##1\else##1\fi} {xs@temp,xs@next,xs@String,xs@cs,xs@Phrase,xs@Sign,xs@TempWord,% xs@templist,xs@@String,xs@WhatNext,xs@NoReplace,xs@Delimiters,xs@Stack,% xs@ParseState,xs@TempNum,xs@TempDef,xs@Finalstring,xs@TempAffix,xs@@temp}% \SearchList!*{def}{\makedef}{?def,let}% \SearchOrder{F!Sp;S!p;p;}% \SortByLength{p}% \SearchOnlyOne{p}% } \RenewExample[continuous]{code}% {\ttfamily\small#}{}% {% \begin{parcolumns}[colwidths={1=10cm},distance=1.2cm]{2}% \parindent0pt% \colchunk{\Comment}% \codesearch \colchunk{\vskip-2\baselineskip\strut\par\CodeInput}% \end{parcolumns}% \gdef\Comment{}% } \LineNumber{code}{\rmfamily\scriptsize}{0pt} \catcode`\§13 \long\def§#1§{\def\Comment{\noindent#1}} \ShortCode/ \def\mac#1{\texttt{\bslash#1}} \subsection{First things first} §% First we look for XeTeX. § / \ifx\csname XeTeXrevision\endcsname\relax \errmessage{You need XeTeX to run xesearch. It won't be loaded.} \expandafter\endinput \else \expandafter\ifx\csname xs@ChangeCatcodes\endcsname\relax \else \expandafter\expandafter\expandafter\endinput \fi \fi / §% These will be used to keep a constant punctuation in spite of catcode-changing packages like \texttt{babel}. § / \catcode`@=11 \def\xs@ChangeCatcodes{% \chardef\xs@questioncode=\catcode`\?% \chardef\xs@exclamationcode=\catcode`\!% \chardef\xs@commacode=\catcode`\,% \chardef\xs@starcode=\catcode`\*% \chardef\xs@semicoloncode=\catcode`\;% \catcode`\?12 \catcode`\!12 \catcode`\,12 \catcode`\*12 \catcode`\;12 } \def\xs@RestoreCatcodes{% \catcode`\?\xs@questioncode \catcode`\!\xs@exclamationcode \catcode`\,\xs@commacode \catcode`\*\xs@starcode \catcode`\;\xs@semicoloncode } \xs@ChangeCatcodes / §% We declare xesearch as a package in LaTeX. § \CodeEscape! / \ifdefined\ProvidesPackage \def\xs@err#1{\PackageError{xesearch}{#1}{}} \ProvidesPackage{!FileName}[!FileDate!space !FileVersion!space Searching documents.] \else \def\MessageBreak{^^J} \def\xs@err#1{% \bgroup \newlinechar`\^^J% \errorcontextlines=0 \errmessage{xsearch error: #1}% \egroup } \fi / \UndoCodeEscape §% \mac{unexpanded} already exists in ConTeXt, and the meaning of the $\varepsilon$-TeX primitive is taken over by \mac{normalunexpanded}, so we have to make the proper adjustment (many thanks to Wolfgang Schuster, who signalled this to me). \mac{xs@contextmodule} is an empty command let to \mac{relax} when xesearch is loaded with ConTeXt. § / \ifcsname xs@contextmodule\endcsname \let\xs@unexpanded\normalunexpanded \else \let\xs@unexpanded\unexpanded \fi / §% Some keywords, indispensable macros, and a bunch of \mac{new} things. § / \def\xs@end{\xs@end} \def\xs@empty{} \def\xs@star{*} \def\xs@exclamation{!} \def\xs@question{?} \def\xs@starexclam{*!} \def\xs@exclamstar{!*} \def\xs@words{words} \def\xs@prefixes{prefixes} \def\xs@suffixes{suffixes} \def\xs@gobble#1{} \def\xs@Lowercase#1#2{\lowercase{\def#2{#1}}} \let\xs@relax\relax \newcount\xs@TempCount \newcount\xs@CaseSensitive \newcount\xs@TempLength \newcount\xs@Length \newbox\xs@Box \newif\ifxs@Concatenate \newif\ifxs@String \newif\ifxs@Affix \newif\ifxs@Prefix \newif\ifxs@Suffix \newif\ifxs@BadWord \newif\ifxs@Star \newif\ifxs@Phrase \newif\ifxs@Match \newtoks\xs@DefToks \newtoks\xs@NoReplaceToks / \subsection{Character classes} §% Basic classes: natural delimiters (spaces and primitives), left and right delimiters (set by \mac{MakeBoundary}) and the normal class, out of which letters and delimiters will be taken. § / \chardef\xs@NatDel=\e@alloc@intercharclass@top \chardef\xs@lrDel=\numexpr\e@alloc@intercharclass@top-1\relax \chardef\xs@Classes=\numexpr\e@alloc@intercharclass@top-2\relax \chardef\xs@Classless=0 \XeTeXinterchartoks\xs@lrDel\xs@Classless={\xs@LearnLetter} \XeTeXinterchartoks\xs@NatDel\xs@Classless={\xs@LearnLetter} \XeTeXinterchartoks\xs@NatDel\xs@lrDel{\xs@EndString} \xs@TempCount\xs@Classes / §% This is how we make boundaries. Note that if the character has a character class of 8 or 9, we don't change it. The interchartoks will be modified, however. § / \def\xs@Delimiters{} \def\xs@MakeDel#1{% \ifx#1\xs@end \let\xs@next\relax \else \let\xs@next\xs@MakeDel \unless\ifnum\the\XeTeXcharclass`#1=7 \unless\ifnum\the\XeTeXcharclass`#1=8 \XeTeXcharclass`#1=\xs@lrDel \expandafter\def\expandafter\xs@Delimiters\expandafter{\xs@Delimiters#1}% \fi \fi \fi\xs@next} \xs@MakeDel\{\}.,;:!?[()]-'`\xs@end \def\MakeBoundary#1{% \xs@MakeDel#1\xs@end } \def\UndoBoundary#1{% \xs@UndoBoundary#1\xs@end } \def\xs@UndoBoundary#1{% \def\xs@temp{#1}% \ifx\xs@temp\xs@end \let\xs@next\relax \else \ifnum\the\XeTeXcharclass`#1=\xs@lrDel \def\xs@RemoveFromDelimiters##1#1##2\xs@end{% \def\xs@Delimiters{##1##2}% }% \expandafter\xs@RemoveFromDelimiters\xs@Delimiters\xs@end \fi \XeTeXcharclass`#1=0 \let\xs@next\xs@UndoBoundary \fi\xs@next } / §% This is the macro that turn a letter into a letter recording itself. It is recursive. Each new letter is assigned a new character class (from 253 downward), then it is made to start the recording process after delimiters, to stop it before, and to add itself to \mac{xs@String} in both case or next to another letter. Before natural delimiters, however, if the word recorded up to now is part of a possible phrase, the process is not stopped. The \textsf{polyglossia} patch is needed when e.g. \texttt{?} is not turned into a \mac{xs@lrDel} but keeps its character class as defined by \textsf{polyglossia}. § / \def\xs@Letters{}% \def\xs@CreateLetter#1{% \ifx#1\xs@end \let\xs@next\relax \else \expandafter\def\expandafter\xs@Letters\expandafter{\xs@Letters#1}% \XeTeXcharclass`#1=\xs@TempCount \expandafter\def\csname\the\xs@TempCount @xstring@letter\endcsname{#1}% \edef\xs@PolyglossiaPatch{% \xs@unexpanded{\XeTeXinterchartoks\xs@TempCount7}{% \xs@unexpanded{\xdef\xs@String{\xs@String#1}\xs@EndString}% \the\XeTeXinterchartoks0 7}% \xs@unexpanded{\XeTeXinterchartoks\xs@TempCount8}{% \xs@unexpanded{\xdef\xs@String{\xs@String#1}\xs@EndString}% \the\XeTeXinterchartoks0 8}% \xs@unexpanded{\XeTeXinterchartoks8\xs@TempCount}{% \the\XeTeXinterchartoks8 0 \xs@unexpanded{\xs@StartSring}}% }% \xs@PolyglossiaPatch \XeTeXinterchartoks\xs@TempCount\xs@Classless{% \xdef\xs@String{\xs@String#1}% \xs@LearnLetter}% \XeTeXinterchartoks\xs@lrDel\xs@TempCount{% \xs@StopTracing \xs@StartString }% \XeTeXinterchartoks\xs@NatDel\xs@TempCount{% \xs@StopTracing \xs@StartString }% \XeTeXinterchartoks\xs@TempCount\xs@lrDel{% \xdef\xs@String{\xs@String#1}\xs@EndString}% \XeTeXinterchartoks\xs@TempCount\xs@NatDel{% \xdef\xs@String{\xs@String#1}% \ifcsname\xs@String @xs@phrases@cs\endcsname \XeTeXinterchartokenstate0 \xdef\xs@Stack{% \xs@String\noexpand\xs@end\xs@unexpanded\expandafter{\xs@Stack}% }% \edef\xs@String{\xs@unexpanded\expandafter{\xs@String} }% \XeTeXinterchartokenstate1 \else \expandafter\xs@Lowercase\expandafter{\xs@String}\xs@lcString \ifcsname\xs@lcString @xs@phrases@ncs\endcsname \XeTeXinterchartokenstate0 \xdef\xs@Stack{% \xs@String\noexpand\xs@end\xs@unexpanded\expandafter{\xs@Stack}% }% \edef\xs@String{\xs@unexpanded\expandafter{\xs@String} }% \XeTeXinterchartokenstate1 \else \expandafter\expandafter\expandafter\xs@EndString \fi \fi }% \xs@TempCount\xs@Classes \xs@MakeInterCharToks#1% \advance\xs@TempCount-1 \let\xs@next\xs@CreateLetter \fi\xs@next } / § This is the recursive macro which creates the \mac{XeTeXinterchartoks} for the new letter and all existing letter. § / \def\xs@MakeInterCharToks#1{% \ifnum\xs@TempCount=\XeTeXcharclass`#1 \XeTeXinterchartoks\xs@TempCount\xs@TempCount{\xdef\xs@String{\xs@String#1}}% \let\xs@next\relax \else\let\xs@next\relax \expandafter\expandafter\expandafter% \xs@Xict\csname\the\xs@TempCount @xstring@letter\endcsname% \xs@TempCount{\XeTeXcharclass`#1}% \xs@Xict#1{\XeTeXcharclass`#1}\xs@TempCount \advance\xs@TempCount-1 \def\xs@next{\xs@MakeInterCharToks#1}% \fi\xs@next} \def\xs@Xict#1#2#3{% \XeTeXinterchartoks#2#3{\xdef\xs@String{\xs@String#1}}% } / §% xesearch learns a letter when it encounters a character with character class 0. Since \mac{xs@CreateLetter} is local, and since it is often executed inside the word box (see \mac{xs@StartString}), we record the letters thus created in \mac{xs@PendingLetters} and create them for good after the group. § / \def\xs@PendingLetters{}% \def\xs@LearnLetter#1{% \xs@CreateLetter#1\xs@end \ifxs@String \xdef\xs@PendingLetters{\xs@PendingLetters#1}% \fi #1} / \subsection{Search lists} §% First we define whether there's an \texttt{!} or a \texttt{*} or both. § / \def\SearchList{% \xs@ChangeCatcodes \xs@StarOrExclam\xs@Search } \def\xs@StarOrExclam#1#2#{% \def\xs@temp{#2}% \ifx\xs@temp\xs@star \xs@CaseSensitive2 \xs@Concatenatefalse \else \ifx\xs@temp\xs@exclamation \xs@CaseSensitive0 \xs@Concatenatetrue \else \ifx\xs@temp\xs@starexclam \xs@CaseSensitive2 \xs@Concatenatetrue \else \ifx\xs@temp\xs@exclamstar \xs@CaseSensitive2 \xs@Concatenatetrue \else \xs@CaseSensitive0 \xs@Concatenatefalse \fi \fi \fi \fi#1% } / §% Then, after a basic check on the name of the list, we record it and defined the macros associated with this list as the second argument; these macros are the normal and !-marked (`\texttt{noreplace}') versions (both are created because there might be an \mac{AddToList} of a different type). Finally we launch the word-maker on the list of words. \mac{AddToList} is equivalent with some adjustments. § / \def\xs@Search#1#2#3{% \ifcsname#1@xs@searchlist\endcsname \xs@err{% `#1' already exists.\MessageBreak Use \string\AddToList{#1}{} to add words to it% }% \else \def\xs@ListName{#1}% \expandafter\def\csname\xs@ListName @words\endcsname{}% \expandafter\def\csname #1@xs@searchlist\endcsname##1{#2}% \expandafter\def\csname #1@xs@searchlist@noreplace\endcsname##1{#2}% \expandafter\xs@MakeWord#3,\xs@end,% \xs@RestoreCatcodes \fi } \def\AddToList{% \xs@ChangeCatcodes \xs@StarOrExclam\xs@AddToList } \def\xs@AddToList#1#2{% \ifcsname#1@xs@searchlist\endcsname \def\xs@ListName{#1}% \expandafter\xs@MakeWord#2,\xs@end,% \xs@RestoreCatcodes \else \xs@err{`#1' is not a list}% \fi \xs@RestoreCatcodes } / §% This takes each word one by one and checks and creates a few things. § / \def\xs@MakeWord#1,{% \def\xs@TempWord{#1}% \ifx\xs@TempWord\xs@end \let\xs@next\relax \else \ifcsname\ifnum\xs@CaseSensitive=2*\fi#1@\xs@ListName\endcsname \xs@err{You have already specified `\ifnum\xs@CaseSensitive=2*\fi#1'% in `\xs@ListName'. \MessageBreak You can't do it twice}% \else \csname#1@\xs@ListName\endcsname \edef\xs@TempWord{#1}% \chardef\xs@ParseState=0 \xs@BadWordfalse \xs@Starfalse \xs@Prefixfalse \xs@Suffixfalse / §% For instance, we parse the word, to find prefixes or suffixes or forbidden things, like control sequences. Then we suppress prefixes and suffixes. § / \xs@ParseWord#1\xs@end \unless\ifxs@BadWord \ifxs@Star \xs@CaseSensitive1 \expandafter\xs@SuppressPrefix\xs@TempWord\xs@end \fi \ifxs@Prefix \expandafter\xs@SuppressSuffix\xs@TempWord \else \ifxs@Suffix \expandafter\xs@SuppressPrefix\xs@TempWord\xs@end \fi \fi / §% Depending on case-sensitivity, we put the word in lowercase or not, and we define a keyword to record the case-sensitivity. § / \def\xs@Phrase{}% \ifcase\xs@CaseSensitive \expandafter\xs@Lowercase\expandafter{\xs@TempWord}\xs@TempWord \def\xs@cs{ncs}% \expandafter\xs@CheckSpaces\xs@TempWord\xs@end \or \def\xs@cs{cs}% \expandafter\xs@CheckSpaces\xs@TempWord\xs@end \xs@CaseSensitive0 \or \def\xs@cs{cs}% \expandafter\xs@CheckSpaces\xs@TempWord\xs@end \fi / §% Finally, we patch the replacement texts associated with this word or affix. § / \ifxs@Prefix \xs@MakePrefix \def\xs@WordType{prefixes}% \expandafter\xs@PatchDef\csname\xs@ListName @xs@searchlist\endcsname \else \ifxs@Suffix \xs@MakeSuffix \def\xs@WordType{suffixes}% \expandafter\xs@PatchDef\csname\xs@ListName @xs@searchlist\endcsname \else \def\xs@WordType{words}% \expandafter\xs@PatchDef\csname\xs@ListName @xs@searchlist\endcsname \fi \fi \fi \fi \let\xs@next\xs@MakeWord \fi\xs@next } / §% This is a basic finite state automaton. It starts in state 0. A star brings it in state 1. In both 0 and 1, if it finds a letter or a \texttt{?} it goes in state 2. From there, only letters and a \texttt{?} at the very end of the word are allowed. Boundaries make it crash. The distinction between stage 0 and stage 1 is needed just in case the user defines the star as a boundary. § / \def\xs@ParseWord#1{% \def\xs@temp{#1}% \ifx\xs@temp\xs@end \let\xs@next\relax \ifxs@Suffix \ifnum\xs@ParseState=3 \xs@err{You can't have a prefix and a suffix in the same word.\MessageBreak `\xs@unexpanded\expandafter{\xs@TempWord}' won't be searched}% \xs@BadWordtrue \fi \fi \else \let\xs@next\xs@ParseWord \expandafter\ifcat\noexpand#1\relax \xs@BadChar#1{control sequences are forbidden}% \else \ifcase\xs@ParseState \chardef\xs@TempNum=\XeTeXcharclass`#1 % \ifx\xs@temp\xs@star \xs@Startrue \chardef\xs@ParseState=1 \let\xs@next\xs@ParseWord \else \ifx\xs@temp\xs@question \xs@Suffixtrue \chardef\xs@ParseState=2 \let\xs@next\xs@ParseWord \else \ifnum\xs@TempNum>\xs@Classes \xs@BadChar#1{it's already a string delimiter}% \else \chardef\xs@ParseState=2 \ifnum\xs@TempNum=0 \xs@CreateLetter#1\xs@end \let\xs@next\xs@ParseWord \fi \fi \fi \fi % \or \chardef\xs@ParseState=2 \chardef\xs@TempNum=\XeTeXcharclass`#1 % \let\xs@next\xs@ParseWord \ifx\xs@temp\xs@question \xs@Suffixtrue \else \ifnum\xs@TempNum>\xs@Classes \xs@BadChar#1{it's already a string delimiter}% \else \ifnum\xs@TempNum=0 \xs@CreateLetter#1\xs@end \let\xs@next\xs@ParseWord \fi \fi \fi % \or \let\xs@next\xs@ParseWord \chardef\xs@TempNum=\XeTeXcharclass`#1 % \ifx\xs@temp\xs@question \xs@Prefixtrue \chardef\xs@ParseState=3 \else \ifnum\xs@TempNum>\xs@Classes \xs@BadChar#1{it's already a string delimiter}% \else \let\xs@next\xs@ParseWord \fi \fi \or \xs@BadChar?{it's already a string delimiter}% \fi \fi \fi\xs@next } / §% This is in case we find something we don't want in the word. § / \def\xs@BadChar#1#2{% \def\xs@next##1\xs@end{}% \xs@BadWordtrue \xs@err{% You can't use `\noexpand#1' in `\xs@unexpanded\expandafter{\xs@TempWord}',\MessageBreak #2.\MessageBreak `\xs@unexpanded\expandafter{\xs@TempWord}' won't be searched }% } / §% In case the word is a phrase, we have to know that, so we check spaces. In case there are some, we record \texttt{word1}, then \texttt{word1 word2}, then \texttt{word1 word2 word3}, etc., as strings that may lead to phrases and should be recognized as such when xesearch is searching. § / \def\xs@CheckSpaces#1\xs@end{% \xs@@CheckSpaces#1 \xs@end } \def\xs@@CheckSpaces#1 #2\xs@end{% \def\xs@temp{#2}% \ifx\xs@temp\xs@empty \let\xs@next\relax \else \expandafter\xs@MakePhrase\xs@Phrase\xs@end#1\xs@end \def\xs@next{\xs@@CheckSpaces#2\xs@end}% \fi\xs@next } \def\xs@MakePhrase#1\xs@end#2\xs@end{% \ifx\xs@Phrase\xs@empty \expandafter\def\csname#2@xs@phrases@\xs@cs\endcsname{}% \edef\xs@Phrase{#2}% \else \expandafter\def\csname#1 #2@xs@phrases@\xs@cs\endcsname{}% \edef\xs@Phrase{#1 #2}% \fi }% / §% In case the word was recognized as an affix, we add it to the list of affixes beginning (in the case of prefixes) or ending (in the case of suffixes) with a given letter (this is supposed to make xesearch faster: when xesearch scans a word, it searches e.g. prefixes if and only if there are prefixes with the same initial letter as the word under investigation, and it compares it to those words only). The affix is also added to the lists sorted by length in both orders. § / \def\xs@GetFirstLetter#1#2\xs@end{% \def\xs@FirstLetter{#1}% } \def\xs@MakePrefix{% \expandafter\ifx\csname\xs@TempWord @\xs@cs @xs@prefixes\endcsname\relax \expandafter\xs@GetFirstLetter\xs@TempWord\xs@end \ifcsname xs@prefixes@\xs@FirstLetter @\xs@cs\endcsname \expandafter\edef\csname xs@prefixes@\xs@FirstLetter @\xs@cs\endcsname{% \csname xs@prefixes@\xs@FirstLetter @\xs@cs\endcsname\xs@TempWord,}% \def\xs@Sign{<}% \xs@Insert{\xs@TempWord}{\csname xs@prefixes@\xs@FirstLetter @\xs@cs @longer\endcsname}% \def\xs@Sign{>}% \xs@Insert{\xs@TempWord}{\csname xs@prefixes@\xs@FirstLetter @\xs@cs @shorter\endcsname}% \else \expandafter\edef\csname xs@prefixes@\xs@FirstLetter @\xs@cs\endcsname{\xs@TempWord,}% \expandafter\edef\csname xs@prefixes@\xs@FirstLetter @\xs@cs @longer\endcsname{\xs@TempWord,}% \expandafter\edef\csname xs@prefixes@\xs@FirstLetter @\xs@cs @shorter\endcsname{\xs@TempWord,}% \fi \fi } \def\xs@GetLastLetter#1{% \ifx#1\xs@end \let\xs@next\relax \else \let\xs@next\xs@GetLastLetter \def\xs@LastLetter{#1}% \fi\xs@next } \def\xs@MakeSuffix{% \expandafter\ifx\csname\xs@TempWord @\xs@cs @xs@suffixes\endcsname\relax \expandafter\xs@GetLastLetter\xs@TempWord\xs@end \ifcsname xs@suffixes@\xs@LastLetter @\xs@cs\endcsname \expandafter\edef\csname xs@suffixes@\xs@LastLetter @\xs@cs\endcsname{% \csname xs@suffixes@\xs@LastLetter @\xs@cs\endcsname\xs@TempWord,}% \def\xs@Sign{<}% \xs@Insert{\xs@TempWord}{\csname xs@suffixes@\xs@LastLetter @\xs@cs @longer\endcsname}% \def\xs@Sign{>}% \xs@Insert{\xs@TempWord}{\csname xs@suffixes@\xs@LastLetter @\xs@cs @shorter\endcsname}% \else \expandafter\edef\csname xs@suffixes@\xs@LastLetter @\xs@cs\endcsname{\xs@TempWord,}% \expandafter\edef\csname xs@suffixes@\xs@LastLetter @\xs@cs @longer\endcsname{\xs@TempWord,}% \expandafter\edef\csname xs@suffixes@\xs@LastLetter @\xs@cs @shorter\endcsname{\xs@TempWord,}% \fi \fi } / §% These suppress the \texttt{?} at the beginning or the end of the word. § / \def\xs@SuppressPrefix#1#2\xs@end{\def\xs@TempWord{#2}} \def\xs@SuppressSuffix#1?{\def\xs@TempWord{#1}} / §% Here's how we sort the list: we check each affix, and we insert the affix to be added just before the the first affix that is shorter or longer, depending on the order. § / \def\xs@CountLetter#1{% \ifx#1\xs@end \let\xs@next\relax \else \advance\xs@Length1 \let\xs@next\xs@CountLetter \fi\xs@next } \def\xs@SortList#1,{% \ifx#1\xs@end \edef\xs@templist{\xs@templist\xs@TempAffix,}% \let\xs@next\relax \else \xs@Length0 \xs@CountLetter#1\xs@end \ifnum\xs@Length\xs@Sign\xs@AffixLength \edef\xs@templist{\xs@templist\xs@TempAffix,#1,}% \let\xs@next\xs@EndList \else \edef\xs@templist{\xs@templist#1,}% \let\xs@next\xs@SortList \fi \fi\xs@next } \def\xs@EndList#1\xs@end,{% \edef\xs@templist{\xs@templist#1}% } \def\xs@Insert#1#2{% \def\xs@TempAffix{#1}% \xs@Length0 \expandafter\xs@CountLetter#1\xs@end \chardef\xs@AffixLength\xs@Length \def\xs@templist{}% \expandafter\expandafter\expandafter\xs@SortList#2\xs@end, \expandafter\let#2\xs@templist } / §% Finally, we make the definition of the word. First, we associate it with the word, so we'll know which words to modify in case of a \mac{StopList}, and to which type it belongs (case-sensitivity, affix or full word, \texttt{!}-marked or not). Then we make both the normal replacement text and the `no-repla\-cement' replacement text. § / \def\xs@PatchDef#1{% \expandafter\edef\csname\xs@ListName @words\endcsname{% \csname\xs@ListName @words\endcsname \xs@TempWord:::\xs@cs:::\xs@WordType:::\ifxs@Concatenate!\fi:::% }% \expandafter\ifx\csname\xs@TempWord @\xs@cs @xs@\xs@WordType\endcsname\relax% \xs@DefToks{\xs@FinalString}% \else \xs@DefToks\expandafter\expandafter\expandafter{% \csname\xs@TempWord @\xs@cs @xs@\xs@WordType\endcsname}% \fi \expandafter\ifx\csname\xs@TempWord @\xs@cs @xs@\xs@WordType @noreplace\endcsname\relax \xs@NoReplaceToks{}% \else \xs@NoReplaceToks\expandafter\expandafter\expandafter{% \csname\xs@TempWord @\xs@cs @xs@\xs@WordType @noreplace\endcsname}% \fi \ifxs@Concatenate \expandafter\edef\csname\xs@TempWord @\xs@cs @xs@\xs@WordType\endcsname{\the\xs@DefToks}% \expandafter\edef\csname\xs@TempWord @\xs@cs @xs@\xs@WordType @noreplace\endcsname{% \the\xs@NoReplaceToks \xs@unexpanded{\expandafter#1\expandafter{\xs@String}}% }% \else \expandafter\edef\csname\xs@TempWord @\xs@cs @xs@\xs@WordType\endcsname{% \noexpand\expandafter\noexpand#1\noexpand\expandafter{\the\xs@DefToks}% }% \fi } / §% Stopping a list is a delicate process: we have to extract the definition associated with the list from the words where it appears, and it is nested in case it is not \texttt{!}-marked. § / \def\StopList{% \xs@ChangeCatcodes \xs@StopList } \def\xs@StopList#1{% \xs@@StopList#1,\xs@end,% \xs@RestoreCatcodes } \def\xs@@StopList#1,{% \def\xs@temp{#1}% \ifx\xs@temp\xs@end \let\xs@next\relax \else \ifcsname#1@xs@searchlist\endcsname \unless\ifcsname#1@xs@stoppedlist\endcsname \csname#1@xs@stoppedlist\endcsname \expandafter\def\expandafter\xs@ToRemove\expandafter{% \csname#1@xs@searchlist\endcsname }% \expandafter\expandafter\expandafter% \xs@PatchWords\csname #1@words\endcsname\xs@end::::::::::::% \fi \else \xs@err{`#1' is not a list}% \fi \let\xs@next\xs@@StopList \fi\xs@next } / §% We modify the adequate replacement text: no-replace or normal. § / \def\xs@PatchWords#1:::#2:::#3:::#4:::{% \def\xs@TempWord{#1}% \ifx\xs@TempWord\xs@end \let\xs@next\relax \else \def\xs@temp{#4}% \ifx\xs@temp\xs@exclamation \expandafter\expandafter\expandafter% \xs@RemoveFromNoReplace\expandafter\xs@ToRemove\csname#1@#2@xs@#3@noreplace\endcsname \fi \def\xs@cs{#2}% \def\xs@WordType{#3}% \expandafter\xs@RemoveFromDef\csname#1@#2@xs@#3\endcsname \let\xs@next\xs@PatchWords \fi\xs@next } / §% Removing from no-replace is rather easy, since it's nothing more than: \par\noindent \mac{expandafter}\mac{\ttslant{}}\mac{expandafter}\{\mac{xs@String}\} \par\noindent \mac{expandafter}\mac{\ttslant{}}\mac{expandafter}\{\mac{xs@String}\} \par\noindent \mac{expandafter}\mac{\ttslant{}}\mac{expandafter}\{\mac{xs@String}\} \par\noindent So we define a macro on the fly to find the definition we want to remove. If there's nothing left, we let this no-replace to \mac{relax}, so this word might be removed altogether when we evaluate what we find. § / \def\xs@RemoveFromNoReplace#1#2{% \def\xs@Erase##1\expandafter#1\expandafter##2##3\xs@end{% \def#2{##1##3}% \ifx#2\xs@empty \let#2\relax \fi }% \expandafter\xs@Erase#2\xs@end } / §% Normal replacement texts have the following structure: \par\noindent \mac{expandafter}\mac{\ttslant{}}\mac{expandafter}\{\par\noindent \strut{} \strut{} \mac{expandafter}\mac{\ttslant{}}\mac{expandafter}\{\par\noindent \strut{} \strut{} \strut{} \strut{} ...\par\noindent \strut{} \strut{} \strut{} \strut{}\strut{} \strut{} \mac{xs@FinalString}\par\noindent \strut{} \strut{} \strut{} \strut{} ...\par\noindent \strut{} \strut{} \}\} \par\noindent So we scan this recursively and rebuild it piecewise, removing the list that was stopped. If in the end there remains \mac{xs@FinalString} only, then there's no replacement text anymore, and if moreover the no-replace part is equal to \mac{relax}, then there's nothing left for that word and it shouldn't be tested anymore. So we let the definition associated with this word to \mac{relax} or we remove it from affixes. § / \def\xs@final{\xs@FinalString} \def\xs@TempDef{} \def\xs@RemoveFromDef#1{% \def\xs@TempDef{}% \def\xs@Def{\xs@FinalString}% \unless\ifx#1\xs@final \expandafter\xs@Extract#1% \fi \let#1\xs@Def \ifx#1\xs@final \expandafter\ifx\csname\expandafter\xs@gobble\string#1@noreplace\endcsname\relax \ifx\xs@WordType\xs@words \let#1\relax \else \xs@RemoveFromAffixes \fi \fi \fi } \def\xs@Extract\expandafter#1\expandafter#2{% \def\xs@temp{#1}% \unless\ifx\xs@temp\xs@ToRemove \edef\xs@TempDef{% \noexpand#1,% \xs@unexpanded\expandafter{\xs@TempDef}% }% \fi \def\xs@temp{#2}% \ifx\xs@temp\xs@final \def\xs@next{% \expandafter\xs@Rebuild\xs@TempDef\xs@end,% }% \else \def\xs@next{% \xs@Extract#2% }% \fi\xs@next } \def\xs@Rebuild#1,{% \ifx#1\xs@end \let\xs@next\relax \else \let\xs@next\xs@Rebuild \edef\xs@Def{% \xs@unexpanded{\expandafter#1\expandafter}% \noexpand{% \xs@unexpanded\expandafter{\xs@Def}% \noexpand}% }% \fi\xs@next }% / §% Removing an affix from a list is easy: we scan each word and rebuild the list, removing the affix we want to deactivate. § / \def\xs@RemoveFromAffixes{% \ifx\xs@WordType\xs@prefixes \expandafter\xs@GetFirstLetter\xs@TempWord\xs@end \let\xs@Letter\xs@FirstLetter \else \expandafter\xs@GetLastLetter\xs@TempWord\xs@end \let\xs@Letter\xs@LastLetter \fi \def\xs@templist{}% \expandafter\expandafter\expandafter% \xs@CleanList\csname xs@\xs@WordType @\xs@Letter @\xs@cs\endcsname\xs@end,% \expandafter\let\csname xs@\xs@WordType @\xs@Letter @\xs@cs\endcsname\xs@templist \def\xs@templist{}% \expandafter\expandafter\expandafter% \xs@CleanList\csname xs@\xs@WordType @\xs@Letter @\xs@cs @shorter\endcsname\xs@end,% \expandafter\let\csname xs@\xs@WordType @\xs@Letter @\xs@cs @shorter\endcsname\xs@templist \def\xs@templist{}% \expandafter\expandafter\expandafter% \xs@CleanList\csname xs@\xs@WordType @\xs@Letter @\xs@cs @longer\endcsname\xs@end,% \expandafter\let\csname xs@\xs@WordType @\xs@Letter @\xs@cs @longer\endcsname\xs@templist \expandafter\let\csname\xs@TempWord @\xs@cs @xs@\xs@WordType\endcsname\relax } \def\xs@CleanList#1,{% \def\xs@temp{#1}% \ifx\xs@temp\xs@end \let\xs@next\relax \else \let\xs@next\xs@CleanList \unless\ifx\xs@temp\xs@TempWord \edef\xs@templist{\xs@templist#1,}% \fi \fi\xs@next } / \subsection{Testing words} §% Here comes the big part: collecting words and testing them. When a letter follows a delimiter, we reset some values and start collecting the letters in a box... § / \def\xs@Stack{} \def\xs@Remainder{} \def\xs@StartString{% \xs@Stringtrue \let\xs@StartString\relax \def\xs@String{}% \def\PrefixFound{}% \def\SuffixFound{}% \def\AffixFound{}% \def\xs@Stack{}% \def\xs@Remainder{}% \xs@Phrasefalse \setbox\xs@Box=\hbox\bgroup } \let\xs@@StartString\xs@StartString / §% \noindent ...and when a delimiter shows up again, unless we're tracking a phrase, we close the box, create the unknown letters that we've found in it, evaluate the word and finally output the result of this evaluation. § / \def\xs@EndString{% \ifxs@String \egroup \xs@Stringfalse \expandafter\xs@CreateLetter\xs@PendingLetters\xs@end \gdef\xs@PendingLetters{}% \xs@Evaluate \xs@Restore \xs@StartTracing \expandafter\xs@Remainder \fi } / §% And here are the tests. The \texttt{F} test is for case-sensitive full words and just checks whether there is a definition for this word in this case. If it finds anything, it puts it around the string that already exists, i.e. either the bare word or the word alreay surrounded by replacement texts. Hence The bunch of \mac{expandafter}s. If there's a no-replace, we also add it to the existing ones. \mac{xs@relax} is just a placeholder to add the inhibitions defined with \mac{SearchOrder}. § / \def\xs@@F@Test{% \expandafter\unless\expandafter\ifx\csname\xs@String @cs@xs@words\endcsname\relax \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter% \def% \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter% \xs@FinalString% \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter{% \csname\xs@String @cs@xs@words\endcsname}% \expandafter\unless\expandafter\ifx\csname\xs@String @cs@xs@words@noreplace\endcsname\relax \edef\xs@NoReplace{% \xs@unexpanded\expandafter{\xs@NoReplace}% \xs@unexpanded\expandafter{\csname\xs@String @cs@xs@words@noreplace\endcsname}% }% \fi \xs@Matchtrue \xs@relax \xs@relax \fi } / §% The \texttt{f} does the same thing, except it puts the word in lowercase before hand. § \begin{code*} \def\xs@@f@Test{% \expandafter\xs@Lowercase\expandafter{\xs@String}\xs@lcString \expandafter\unless\expandafter\ifx\csname\xs@lcString @ncs@xs@words\endcsname\relax \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter% \def% \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter% \xs@FinalString% \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter{% \csname\xs@lcString @ncs@xs@words\endcsname}% \expandafter\unless\expandafter\ifx\csname\xs@lcString @ncs@xs@words@noreplace\endcsname\relax \edef\xs@NoReplace{% \xs@unexpanded\expandafter{\xs@NoReplace}% \xs@unexpanded\expandafter{\csname\xs@lcString @ncs@xs@words@noreplace\endcsname}% }% \fi \xs@Matchtrue \xs@relax \xs@relax \fi } \end{code*} §% Tests on prefixes check whether there exists a prefix list beginning with the same letter as the word at stake, and in this case run the \mac{xs@CheckPrefixes} test. § / \def\xs@@p@Test{% \xs@Affixfalse \expandafter\xs@GetFirstLetter\xs@lcString\xs@end \ifcsname xs@prefixes@\xs@FirstLetter @ncs\endcsname \let\xs@@String\xs@lcString \def\xs@cs{ncs}% \let\xs@WhatNext\xs@p@WhatNext \expandafter\expandafter\expandafter% \xs@CheckPrefixes\csname xs@prefixes@\xs@FirstLetter @ncs\p@order\endcsname\xs@end,% \fi \ifxs@Affix \xs@Affixfalse \xs@Matchtrue \xs@relax \xs@relax \fi } \def\xs@@P@Test{% \xs@Affixfalse \expandafter\xs@GetFirstLetter\xs@String\xs@end \ifcsname xs@prefixes@\xs@FirstLetter @cs\endcsname \let\xs@@String\xs@String \def\xs@cs{cs}% \let\xs@WhatNext\xs@P@WhatNext \expandafter\expandafter\expandafter% \xs@CheckPrefixes\csname xs@prefixes@\xs@FirstLetter @cs\P@order\endcsname\xs@end,% \fi \ifxs@Affix \xs@Affixfalse \xs@Matchtrue \xs@relax \xs@relax \fi } / §% Prefixes are tested one by one by creating a macro on the fly where one delimiter is the prefix. Then we put the word at stake before it and execute the macro, and if there's no first argument, then the word matches the prefix. For instance, if the word is \texttt{democracy} and the prefix is \texttt{demo} then we test\par\noindent \mac{xs@TestPrefix democracydemo} \par\noindent and obviously the first argument is empty, since \texttt{demo} is a delimiter. § / \def\xs@CheckPrefixes#1,{% \def\xs@temp{#1}% \ifx\xs@temp\xs@end \let\xs@next\relax \else \def\xs@TestPrefix##1#1##2\xs@end{% \def\xs@temp{##1}% \ifx\xs@temp\xs@empty \xs@Affixtrue \def\PrefixFound{#1}% \def\AffixFound{#1}% \let\xs@next\xs@WhatNext \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter% \def% \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter% \xs@FinalString% \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter{% \csname#1@\xs@cs @xs@prefixes\endcsname}% \expandafter\unless\expandafter\ifx\csname#1@\xs@cs @xs@prefixes@noreplace\endcsname\relax \edef\xs@NoReplace{% \xs@unexpanded\expandafter{\xs@NoReplace}% \xs@unexpanded\expandafter{\csname#1@\xs@cs @xs@prefixes@noreplace\endcsname}% }% \fi \else \let\xs@next\xs@CheckPrefixes \fi }% \expandafter\xs@TestPrefix\xs@@String#1\xs@end \fi\xs@next } / §% The tests for suffixes work along the same lines as those for prefixes. § / \def\xs@@S@Test{% \xs@Affixfalse \expandafter\xs@GetLastLetter\xs@String\xs@end \ifcsname xs@suffixes@\xs@LastLetter @cs\endcsname \let\xs@@String\xs@String \def\xs@cs{cs}% \let\xs@WhatNext\xs@S@WhatNext \expandafter\expandafter\expandafter% \xs@CheckSuffixes\csname xs@suffixes@\xs@LastLetter @cs\S@order\endcsname\xs@end,% \fi \ifxs@Affix \xs@Affixfalse \xs@Matchtrue \xs@relax \xs@relax \fi } \def\xs@@s@Test{% \xs@Affixfalse \expandafter\xs@GetLastLetter\xs@lcString\xs@end \ifcsname xs@suffixes@\xs@LastLetter @ncs\endcsname \let\xs@@String\xs@lcString \def\xs@cs{ncs}% \let\xs@WhatNext\xs@s@WhatNext \expandafter\expandafter\expandafter% \xs@CheckSuffixes\csname xs@suffixes@\xs@LastLetter @ncs\s@order\endcsname\xs@end,% \fi \ifxs@Affix \xs@Affixfalse \xs@Matchtrue \xs@relax \xs@relax \fi } \def\xs@CheckSuffixes#1,{% \def\xs@temp{#1}% \ifx\xs@temp\xs@end \let\xs@next\relax \else \def\xs@TestSuffix##1#1##2\xs@end{% \def\xs@@temp{##2}% \ifx\xs@temp\xs@@temp \xs@Affixtrue \def\SuffixFound{#1}% \def\AffixFound{#1}% \let\xs@next\xs@WhatNext \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter% \def% \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter% \xs@FinalString% \expandafter\expandafter\expandafter\expandafter\expandafter\expandafter\expandafter% {% \csname#1@\xs@cs @xs@suffixes\endcsname}% \expandafter\unless\expandafter\ifx\csname#1@\xs@cs @xs@suffixes@noreplace\endcsname\relax \edef\xs@NoReplace{% \xs@unexpanded\expandafter{\xs@NoReplace}% \xs@unexpanded\expandafter{\csname#1@\xs@cs @xs@suffixes@noreplace\endcsname}% }% \fi \else% \let\xs@next\xs@CheckSuffixes \fi }% \expandafter\xs@TestSuffix\xs@@String#1\xs@end \fi\xs@next } / \subsection{Search order} §% \mac{SearchOrder} actually defines \mac{xs@Evaluate}. First it adds inhibitions to the tests, e.g. `\texttt{F!f;}' adds \mac{let}\mac{xs@f@Test}\mac{relax} to the \texttt{F} test in case it is positive, then it adds the tests themselves, in the specified order, to \mac{xs@Evaluate}. § / \def\SearchOrder{% \xs@ChangeCatcodes \xs@SearchOrder } \def\xs@SearchOrder#1{% \def\xs@Order{}% \xs@@SearchOrder#1\xs@end;% \edef\xs@Evaluate{% \xs@unexpanded{% \XeTeXinterchartokenstate=0 \def\xs@NoReplace{}% \let\xs@FinalString\xs@String \expandafter\xs@Lowercase\expandafter{\xs@String}\xs@lcString }% \xs@unexpanded\expandafter{% \xs@Order \ifxs@Match \def\xs@next{% \xs@FinalString }% \else / §% If the stack is not empty, it means we're dealing with a phrase; so the evaluation is not over in case no test has succeded. We first have to test the phrase minus the last word, then the phrase minus the last two words, etc. § / \unless\ifx\xs@Stack\xs@empty \xs@Phrasetrue \expandafter\xs@PopStack\xs@Stack\xs@@end \let\xs@next\xs@Evaluate \else \ifxs@Phrase \def\xs@Stack{}% \def\xs@next{\xs@String\xs@Restore}% \else / §% If the word was not a phrase, and no test was successful, we simply put the box that contains it back into the stream. § / \def\xs@next{\unhbox\xs@Box\xs@Restore}% \fi \fi \fi\xs@next }% }% / §% We initialize the tests. § / \let\xs@f@Test\xs@@f@Test \let\xs@F@Test\xs@@F@Test \let\xs@p@Test\xs@@p@Test \let\xs@P@Test\xs@@P@Test \let\xs@s@Test\xs@@s@Test \let\xs@S@Test\xs@@S@Test \xs@RestoreCatcodes } / §% This treats each specification in \mac{SearchOrder} and the inhibitions, if any. § / \def\xs@@SearchOrder#1#2;{% \def\xs@temp{#1#2}% \ifx#1\xs@end \let\xs@next\relax \else \def\xs@Inhibit{}% \xs@MakeInhibit#2\xs@end \expandafter\expandafter\expandafter\xs@PatchTest\csname xs@@#1@Test\endcsname#1% \edef\xs@Order{% \xs@unexpanded\expandafter{\xs@Order}% \xs@unexpanded\expandafter{\csname xs@#1@Test\endcsname}}% \let\xs@next\xs@@SearchOrder \fi\xs@next } \def\xs@MakeInhibit#1{% \def\xs@temp{#1}% \ifx#1\xs@end \let\xs@next\relax \else \let\xs@next\xs@MakeInhibit \unless\ifx\xs@temp\xs@exclamation% \edef\xs@Inhibit{% \xs@unexpanded\expandafter{\xs@Inhibit \expandafter\let\csname xs@#1@Test\endcsname\relax}% }% \fi \fi\xs@next } \def\xs@PatchTest#1\xs@relax#2\xs@relax#3#4{% \expandafter\edef\csname xs@@#4@Test\endcsname{% \xs@unexpanded{#1}% \xs@unexpanded\expandafter{\expandafter\xs@relax\xs@Inhibit\xs@relax\fi}% }% } / §% The evaluation ends in any case with the restoration of the tests, in case they were inhibited. the remainder is the right part of a discarded phrase. For instance, if xesearch searches for \texttt{page layout} it will investigate \texttt{page properties} if it finds it, and the remainder is \texttt{properties}. § / \def\xs@Restore{% \xs@Matchfalse \let\xs@f@Test\xs@@f@Test \let\xs@F@Test\xs@@F@Test \let\xs@p@Test\xs@@p@Test \let\xs@P@Test\xs@@P@Test \let\xs@s@Test\xs@@s@Test \let\xs@S@Test\xs@@S@Test \let\xs@StartString\xs@@StartString \edef\xs@Remainder{% \xs@unexpanded\expandafter{\xs@NoReplace}% \xs@unexpanded\expandafter{\xs@Remainder}% }% \XeTeXinterchartokenstate=1 } / §% This is used to test phrases minus the last word on each iteration. The stack itself is built when the beginning of a phrase is found before a natural delimiter. § / \def\xs@PopWord#1\xs@end#2\xs@end{% \def\xs@String{#2}% \def\xs@@PopWord#2##1\xs@end{% \edef\xs@Remainder{##1\xs@unexpanded\expandafter{\xs@Remainder}% }% }% \xs@@PopWord#1\xs@end } \def\xs@PopStack#1\xs@end#2\xs@@end{% \def\xs@Stack{#2}% \expandafter\xs@PopWord\xs@String\xs@end#1\xs@end } / §% To search affixes in a given order, we simply define the list to be used in tests to be the one with this order. § / \def\SortByLength#1{% \def\xs@temp{#1}% \ifx\xs@temp\xs@star \def\xs@AffixOrder{@shorter}% \let\xs@next\xs@SortByLength \else \def\xs@AffixOrder{@longer}% \def\xs@next{\xs@@SortByLength#1\xs@end}% \fi \xs@next}% \def\xs@SortByLength#1{% \xs@@SortByLength#1\xs@end } \def\xs@@SortByLength#1{% \ifx#1\xs@end \let\xs@next\relax \else \expandafter\let\csname #1@order\endcsname\xs@AffixOrder \let\xs@next\xs@@SortByLength \fi\xs@next } \def\DoNotSort{% \def\xs@AffixOrder{}% \xs@SortByLength } / §% Searching all affixes is done by setting the \mac{xs@WhatNext} macro to \mac{xs@\ttslant{}@WhatNext}, depending on the text being performed. § / \def\SearchAll#1{% \xs@SearchAll#1\xs@end } \def\xs@SearchAll#1{% \ifx#1\xs@end \let\xs@next\relax \else\let\xs@next\xs@SearchAll \if#1p% \let\xs@p@WhatNext\xs@CheckPrefixes \else \if#1P \let\xs@P@WhatNext\xs@CheckPrefixes \else \if#1s \let\xs@s@WhatNext\xs@CheckSuffixes \else \let\xs@S@WhatNext\xs@CheckSuffixes \fi \fi \fi \fi\xs@next } \def\SearchOnlyOne#1{% \xs@SearchOne#1\xs@end } / §% Searching only one affix is simply gobbling the remaining ones in case of a successful test. § / \def\xs@SearchOne#1{% \ifx#1\xs@end \let\xs@next\relax \else \let\xs@next\xs@SearchOne \expandafter\def\csname xs@#1@WhatNext\endcsname##1\xs@end,{}% \fi\xs@next } / \subsection{Miscellanea} §% For the moment, starting and stopping the search is quite brutal. § / \def\StopSearching{% \let\xs@StartString\relax } \def\StartSearching{% \let\xs@StartString\xs@@StartString } / §% Patching the output very simple too. § / \let\xs@OldOutput\relax \def\PatchOutput{% \ifx\xs@OldOutput\relax \edef\xs@PatchOutput{% \noexpand\def\noexpand\xs@OldOutput{% \the\output }% \noexpand\output{% \noexpand\StopSearching \the\output \noexpand\StartSearching }% }% \expandafter\xs@PatchOutput \else \xs@err{Output already patched}% \fi } \def\NormalOutput{% \ifx\xs@OldOutput\relax \xs@err{Output has not been patched}% \else \expandafter\output\expandafter{% \xs@OldOutput }% \let\xs@OldOutput\relax \fi } / §% As is patching the tracing. § / \def\PatchTracing{% \def\xs@StopTracing{% \chardef\xs@tracingcommands\tracingcommands \chardef\xs@tracingmacros\tracingmacros \tracingcommands0 \tracingmacros0\relax }% \def\xs@StartTracing{% \tracingcommands\xs@tracingcommands \tracingmacros\xs@tracingmacros }% } \def\NormalTracing{% \let\xs@StopTracing\relax \let\xs@StartTracing\relax } \NormalTracing / §% finally we set everything back to normal, set some default values and say goodbye. § / \xs@RestoreCatcodes \catcode`@=12 \SearchOrder{ F!fPpSs; f!PpSs; P!pSs; p!Ss; S!s; s; } \DoNotSort{pPsS} \SearchAll{pPsS} \XeTeXinterchartokenstate1 \endinput / \subsection{A third party file for ConTeXt} \ProduceFile{t-xesearch.tex}[t-xesearch][v.0.1][\the\year.\month.\day] \CodeEscape! § This file is mostly due to Wolfgang Schuster. \mac{xs@contextmodule} is used when the main file is loaded to set the meaning of \mac{xs@unexpanded}. (ConTeXt commands have meaningful names, so I didn't want to rely on them as tests for ConTeXt, because there might exist commands with the same names in other formats.) § \begin{code} %D \module %D [ file=!FileName, %D version=!FileDate, %D title=\CONTEXT\ User Module, %D subtitle=XeSearch, %D author=Paul Isambert, %D date=\currentdate, %D copyright=Paul Isambert, %D email=zappathustra@free.fr, %D license=LaTeX Project Public License] \writestatus{loading}{ConTeXt User Module / XeSearch} \csname xs@contextmodule\endcsname \input xesearch.sty \endinput \end{code} \UndoCodeEscape \StopSearching \framebreak \renewenvironment{theindex}{% \section{Index}}{} \def\item{\par\noindent} \printindex \end{document}