% \iffalse meta-comment % % Copyright (C) 2013 by Wolfgang Skala % % This work may be distributed and/or modified under the % conditions of the LaTeX Project Public License, either version 1.3 % of this license or (at your option) any later version. % The latest version of this license is in % http://www.latex-project.org/lppl.txt % and version 1.3 or later is part of all distributions of LaTeX % version 2005/12/01 or later. % % \fi % % \iffalse %\ProvidesPackage{pgfmolbio}[2013/08/01 v0.21 Molecular biology graphs with TikZ] %\NeedsTeXFormat{LaTeX2e}[1999/12/01] %module("pgfmolbio", package.seeall) %\ProvidesFile{pgfmolbio.chromatogram.tex}[2013/08/01 v0.21 SCF chromatograms] %module("pgfmolbio.chromatogram", package.seeall) %\ProvidesFile{pgfmolbio.domains.tex}[2013/08/01 v0.21 Protein domains] %module("pgfmolbio.domains", package.seeall) %\ProvidesFile{pgfmolbio.convert.tex}[2013/08/01 v0.21 pgfmolbio graph conversion] % %<*driver> \documentclass[captions=tableheading,cleardoublepage=empty,titlepage=false]{scrreprt} \setkomafont{title}{\rmfamily\bfseries} \addtokomafont{sectioning}{\rmfamily} \usepackage[ngerman,english]{babel} \usepackage[hdivide={2cm,*,5cm}]{geometry} \usepackage{fontspec} \usepackage{array,booktabs,tabularx} \usepackage[chromatogram,domains]{pgfmolbio} \usetikzlibrary{patterns,backgrounds,decorations.pathreplacing} \usepackage{ydoc-doc,ydoc-code,ydoc-desc,ydoc-expl} \AlsoImplementation \hypersetup{% colorlinks=false,% bookmarksnumbered,% bookmarksopen,% bookmarksopenlevel=1,% breaklinks,% pdfborder=0 0 0,% pdfhighlight=/N,% } \AtBeginDocument{% \lstMakeShortInline[style=latex-expl,basicstyle=\ttfamily,numbers=none,firstnumber=last]|% \lstMakeShortInline[style=lua-doc,basicstyle=\ttfamily,frame=none,numbers=none]§% } \makeatletter \newcommand\DescribeOption[4][=]{% \gdef\OptDefault{\textcolor{black!50}{Default:}~\texttt{#4}}% \DescribeMacros \let\DescribeMacros\y@egroup \optionalon \def\after@Macro@args{\y@egroup\PrintOptions\endgroup}% \hbox\y@bgroup \texttt{\textcolor{opt!50}{/pgfmolbio/#2}\textcolor{opt}{#3}~#1}% \ydoc@macrocatcodes \macroargsstyle \read@Macro@arg% } \def\PrintOptions{% \par\vspace\beforedescskip \begingroup \sbox\@tempboxa{\descframe{\usebox{\descbox}}}% \Needspace*{\dimexpr\ht\@tempboxa+2\baselineskip\relax}% \par\noindent \ifdim\wd\@tempboxa>\dimexpr\linewidth-2\descindent\relax \makebox[\linewidth][c]{\usebox\@tempboxa}% \else \hspace*{\descindent}% \usebox\@tempboxa \fi \endgroup \par\medskip\makebox{\kern10pt\OptDefault} \vspace\afterdescskip \par\noindent } \def\PrintExample{% \begingroup \BoxExample \@tempdima\textwidth \advance\@tempdima-\wd\examplecodebox\relax \advance\@tempdima-\wd\exampleresultbox\relax \advance\@tempdima-1cm\relax \ifdim\@tempdima>0pt \@tempdimb\wd\exampleresultbox \advance\@tempdimb2\fboxsep \advance\@tempdimb2\fboxrule \par\bigskip\noindent% \centerline{% \parbox[c]{\@tempdimb}{\fbox{\usebox\exampleresultbox}} \hskip1cm \parbox[c]{\wd\examplecodebox}{\usebox\examplecodebox} }% \par\bigskip \else \par\bigskip\noindent% \vbox{% \centerline{\fbox{\usebox\exampleresultbox}}% \vspace{\bigskipamount}% \centerline{\usebox\examplecodebox}% }% \par\bigskip \fi \endgroup } \newcommand\DescribeFeature{\@ifstar\DescribeFeatureNoAlias\DescribeFeatureAlias} \newcommand\DescribeFeatureAlias[2]{\paragraph{Feature \texttt{#1}} (\textit{alias} \texttt{#2})\\} \newcommand\DescribeFeatureNoAlias[1]{\paragraph{Feature \texttt{#1}} (\textit{no alias})\\} \makeatother \def\ometa#1{{\optional\meta{#1}}} \lstdefinestyle{exampleextract}{gobble=2} \lstdefinestyle{examplecode}{style=latex-expl} \def\ydoclistingssettings{\lstset{style=latex-expl}} \definecolor{opt}{named}{OliveGreen} \definecolor{mod}{named}{black} \definecolor{macrodesc}{named}{ProcessBlue} \definecolor{macroimpl}{named}{ProcessBlue} \newcommand\module[1]{\textsf{\textcolor{mod}{#1}}} \renewcommand*\optstyle[1]{\texttt{\textcolor{opt}{#1}}} \lstdefinestyle{latex-expl}{ language=[AlLaTeX]TeX, columns=fullflexible, keepspaces=true, tabsize=2, numbers=left, numberstyle=\sffamily\tiny\color{gray}, numbersep=5pt, firstnumber=auto, prebreak={}, basicstyle=\ttfamily\small, texcsstyle=*\color{MidnightBlue}, texcsstyle=*[2]\color{ProcessBlue}, keywordstyle=\color{RedOrange}, commentstyle=\itshape\color{black!50}, morekeywords={tikzpicture,pgfinterruptboundingbox,pgfinterruptpicture,pmbdomains,scope}, moretexcs=[1]{ @empty,@ifundefined,@nameuse,clip,colorlet,definecolor,directlua,draw,ifluatex, luaescapestring,node,path,pdfdraftmode,pgf@protocolsizes,pgf@x,pgf@y,pgfdeclareverticalshading, pgfkeys,pgfkeysalso,pgfkeysdef,pgfkeyssetvalue,pgfkeysvalueof,pgfmathsetmacro,pgfpoint,pgfqkeys, ProcessOptions,RequireLuaModule,textcolor,tikzset,useasboundingbox,usetikzlibrary }, moretexcs=[2]{ @pmb@chr@getkey,@pmb@chr@keydef,@pmb@chr@stylekeydef, @pmb@dom@feature@default@shape,@pmb@dom@helixsegment,@pmb@dom@helixhalfsegment,@pmb@dom@keydef, @pmb@toksa,@pmb@toksb,@tempa,adddisulfidefeatures,addfeature,featureSequence,currentResidue, pmb@chr@tikzpicturefalse,pmb@chr@tikzpicturetrue,ifpmb@chr@showbasenumbers,ifpmb@chr@tikzpicture, ifpmb@con@includedescription,ifpmb@con@outputtikzcode,ifpmb@dom@showname,ifpmb@dom@showruler, ifpmb@dom@showsecstructure,ifpmb@dom@tikzpicture,ifpmb@loadmodule@chromatogram,ifpmb@loadmodule@convert, ifpmb@loadmodule@domains,inputuniprot,inputgff, pgfmolbioset,pmb@con@outputtikzcodefalse,pmb@con@outputtikzcodetrue, pmb@dom@addfeature,pmb@dom@inputgff,pmb@dom@inputuniprot,pmb@dom@tikzpicturetrue,pmb@dom@tikzpicturefalse, pmb@loadmodule@chromatogramtrue,pmb@loadmodule@converttrue, pmb@loadmodule@domainstrue,pmb@magnifiedsequence@width,pmbchromatogram,pmbdomdrawfeature, pmbdomvalueof,pmbprotocolsizes,removedisulfidefeatures,removefeatureprintfunction, residueNumber,setdisulfidefeatures,setfeaturealias, setfeatureprintfunction,setfeatureshape,setfeatureshapealias,setfeaturestyle,setfeaturestylealias, turnXradius,turnYradius,xLeft,xLowerLeft,xLowerRight,xMid,xRight,xUpperLeft,xUpperRight, yLower,yMid,yShift,yUpper }, escapebegin=\begin{rmfamily}\color{black!50}, escapeend=\end{rmfamily} } \lstdefinestyle{latex-doc}{ style=latex-expl, showlines, firstnumber=last, breaklines, frame=single, frameround=tttt, rulecolor=\color{black!50} } \lstdefinestyle{lua-doc}{ language=lua, columns=fullflexible, keepspaces=true, tabsize=2, basicstyle=\ttfamily\small\color{ForestGreen}, keywordstyle=\color{MidnightBlue}, keywordstyle=[2]\color{ProcessBlue}, stringstyle=\color{Red}, identifierstyle=\color{Black}, emphstyle=\color{BurntOrange!50!Black}, showstringspaces=false, numbers=left, numberstyle=\sffamily\tiny\color{gray}, numbersep=5pt, firstnumber=last, breaklines, showlines, frame=single, frameround=tttt, rulecolor=\color{black!50}, belowskip=\bigskipamount } \lstdefinelanguage{lua}{ morekeywords={and,break,do,else,elseif,end,false,for,function,if,in,local, nil,not,or,repeat,return,then,true,until,while}, morekeywords=[2]{arg,assert,collectgarbage,dofile,error,_G,format,getfenv, getmetatable,__index,ipairs,load,loadfile,loadstring,next,pairs,pcall,print, rawequal,rawget,rawset,select,self,setfenv,setmetatable,tonumber,tostring,__tostring, type,unpack,_VERSION,xpcall}, morekeywords=[2]{module,require,package,seeall}, morekeywords=[2]{string,byte,char,dump,find, format,gmatch,string,gsub,len,lower, match,rep,reverse,sub,trim,upper}, morekeywords=[2]{table,concat,insert,maxn,remove,sort}, morekeywords=[2]{math,min}, morekeywords=[2]{io,input,open,output,close,flush, lines,read,seek,setvbuf,write}, morekeywords=[2]{os.clock,os.date,os.difftime,os.execute,os.exit,os.getenv, os.remove,os.rename,os.setlocale,os.time,os.tmpname}, morekeywords=[2]{tex,sprint}, emph={addFeature,aliasFeatureStyle,calculateDisulfideLevels,Chromatogram,clearKeys,dimToString,findBasesInStr, getBaseKey,getMinMaxProbability,getParameters,getRange,getSampleAndPeakIndex,new,packageError, packageWarning,printSequenceFeature,printHelixFeature,printTikzChromatogram,printTikzDomains,Protein, readGffFile,readInt,readScfFile,readUniprotFile,selectStyleFromList,setCoordinateFormat, setFeatureStyle,setKeys,setParameters,SpecialKeys,stdProbStyle,stringToDim,toAbsoluteResidueNumber}, sensitive=true, alsoletter={0123456789}, morecomment=[l]{--}, morecomment=[s]{--[[}{]]--}, morestring=[b]{"}, morestring=[d]{'} } \def\TikZ{Ti\textit{k}Z} \usepackage{caption} \captionsetup{format=plain,indention=1em,labelsep=colon,font={footnotesize,sf},labelfont={bf},skip=0pt} \makeatletter\@addtoreset{example}{chapter}\makeatother \renewcommand\theexample{\arabic{chapter}.\arabic{example}} \pdfpageattr{/Group <>} \begin{document} \DocInput{pgfmolbio.dtx} \end{document} % % \fi % % % % \CharacterTable % {Upper-case \A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z % Lower-case \a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z % Digits \0\1\2\3\4\5\6\7\8\9 % Exclamation \! Double quote \" Hash (number) \# % Dollar \$ Percent \% Ampersand \& % Acute accent \' Left paren \( Right paren \) % Asterisk \* Plus \+ Comma \, % Minus \- Point \. Solidus \/ % Colon \: Semicolon \; Less than \< % Equals \= Greater than \> Question mark \? % Commercial at \@ Left bracket \[ Backslash \\ % Right bracket \] Circumflex \^ Underscore \_ % Grave accent \` Left brace \{ Vertical bar \| % Right brace \} Tilde \~} % % % \GetFileInfo{pgfmolbio.sty} % % \CheckSum{1254} % % \pagenumbering{roman} % \title{The \texttt{pgfmolbio} package --\texorpdfstring{\\}{}Molecular Biology Graphs with \TikZ\texorpdfstring{\footnote{This document describes version \fileversion, dated \filedate.}}{}} % \author{\texorpdfstring{Wolfgang Skala\thanks{Division of Structural Biology, Department of Molecular Biology, University of Salzburg, Austria; \texttt{Wolfgang.Skala@stud.sbg.ac.at}}}{Wolfgang Skala}} % \date{\filedate} % \maketitle % % \begin{abstract} % The experimental package \pkg{pgfmolbio} draws graphs typically found in molecular biology texts. Currently, the package contains three modules: \module{chromatogram} creates DNA sequencing chromatograms from files in standard chromatogram format (\file{scf}); \module{domains} draws protein domain diagrams; \module{convert} integrates \pkg{pgfmolbio} with \TeX\ engines that lack Lua support. % \end{abstract} % % \tableofcontents % % % \chapter{Introduction} % \label{cha:Introduction} % \pagenumbering{arabic} % % % \section{About \texorpdfstring{\pkg{pgfmolbio}}{pgfmolbio}} % \label{sec:IntroAbout} % % Over the decades, \TeX\ has gained popularity across a large number of disciplines. Although originally designed as a mere typesetting system, packages such as \pkg{pgf}\footnote{Tantau, T. (2010). The \TikZ\ and \textsc{pgf} packages. \url{http://ctan.org/tex-archive/graphics/pgf/}.} and \pkg{pstricks}\footnote{van Zandt, T., Niepraschk, R., and Voß, H. (2007). PSTricks: PostScript macros for Generic \TeX. \url{http://ctan.org/tex-archive/graphics/pstricks}.} have strongly extended its \textit{drawing} abilities. Thus, one can create complicated charts that perfectly integrate with the text. % % Texts on molecular biology include a range of special graphs, e.\,g. multiple sequence alignments, membrane protein topologies, DNA sequencing chromatograms, protein domain diagrams, plasmid maps and others. The \pkg{texshade}\footnote{Beitz, E. (2000). \TeX shade: shading and labeling multiple sequence alignments using \LaTeXe. \textit{Bioinformatics}~\textbf{16}(2), 135--139.\\\url{http://ctan.org/tex-archive/macros/latex/contrib/texshade}.} and \pkg{textopo}\footnote{Beitz, E. (2000). \TeX topo: shaded membrane protein topology plots in \LaTeXe. \textit{Bioinformatics} \textbf{16}(11), 1050--1051.\\\url{http://ctan.org/tex-archive/macros/latex/contrib/textopo}.} packages cover alignments and topologies, respectively, but packages dedicated to the remaining graphs are absent. Admittedly, one may create those images with various external programs and then include them in the \TeX\ document. Nevertheless, purists (like the author of this document) might prefer a \TeX-based approach. % % The \pkg{pgfmolbio} package aims at becoming such a purist solution. In the current development release, \pkg{pgfmolbio} is able to % \begin{itemize} % \item read DNA sequencing files in standard chromatogram format (\file{scf}) and draw the corresponding chromatogram; % \item read protein domain information from Uniprot or general feature format files (\file{gff}) and draw domain diagrams. % \end{itemize} % To this end, \pkg{pgfmolbio} relies on routines from \pkg{pgf}'s \TikZ\ frontend and on the Lua scripting language implemented in Lua\TeX. Consequently, the package will not work directly with traditional engines like pdf\TeX. However, a converter module ensures a high degree of backward compatibility. % % Since this is a development release, \pkg{pgfmolbio} presumably includes a number of bugs, and its commands and features are likely to change in future versions. Moreover, the current version is far from complete, but since time is scarce, I am unable to predict when (and if) additional functions become available. Nevertheless, I would greatly appreciate any comments or suggestions. % % % \section{Getting Started} % \label{sec:IntroGettingStarted} % % Before you consider using \pkg{pgfmolbio}, please make sure that both your Lua\TeX\ (at least 0.70.2) and \pkg{pgf} (at least 2.10) installations are up-to-date. Once your \TeX\ system meets these requirements, just load \pkg{pgfmolbio} as usual, i.\,e. by % % \DescribeMacro\usepackage[]{pgfmolbio} % % The package is divided into \textit{modules}, each of which produces a certain type of graph. Currently, three \ometa{module}s are available: % \begin{itemize} % \item \module{chromatogram} (chapter~\ref{cha:Chromatogram}) allows you to draw DNA sequencing chromatograms obtained by the Sanger sequencing method. % \item \module{domains} (chapter~\ref{cha:Domains}) provides macros for drawing protein domain diagrams and is also able to read domain information from files in Uniprot or general feature format. % \item Furthermore, \module{convert} (chapter~\ref{cha:Convert}) is used with one of the modules above and generates ``pure'' \TikZ\ code suitable for \TeX\ engines lacking Lua support. % \end{itemize} % % \DescribeMacro\pgfmolbioset[]{} % Fine-tunes the graphs produced by each \pkg{pgfmolbio} module. The possible keys are described in the sections on the respective modules. % % % % % \chapter{The \texorpdfstring{\module{chromatogram}}{chromatogram} module} % \label{cha:Chromatogram} % % % \section{Overview} % \label{sec:ChrOverview} % % The \module{chromatogram} module draws DNA sequencing chromatograms stored in standard chromatogram format (\file{scf}), which was developed by Simon Dear and Rodger Staden\footnote{Dear, S. and Staden, R. (1992). A standard file format for data from DNA sequencing instruments. \textit{DNA Seq.} \textbf{3}(2), 107--110.}. The documentation for the Staden package\footnote{\url{http://staden.sourceforge.net/}} describes the current version of the \file{scf} format in detail. As far as they are crucial to understanding the Lua code, we will discuss some details of this file format in the documented source code (section~\ref{sec:DocChrLua}). Note that \pkg{pgfmolbio} only supports \file{scf} version 3.00. % % % \section{Drawing Chromatograms} % \label{sec:ChrDrawingChromatograms} % % \DescribeMacro\pmbchromatogram[]{} % The \module{chromatogram} module defines a single command, which reads a chromatogram from an \meta{scf file} and draws it with routines from \TikZ\ (Example~\ref{exa:ChrTikzpicture}). The options, which are set in the \ometa{key-value list}, configure the appearance of the chromatogram. The following sections will elaborate on the available keys. % \begin{exampletable} % \caption{} % \label{exa:ChrTikzpicture} % \begin{examplecode} % \begin{tikzpicture} % optional % \pmbchromatogram{SampleScf.scf} % \end{tikzpicture} % optional % \end{examplecode} % \end{exampletable} % % Although you will often put |\pmbchromatogram| into a |tikzpicture| environment, you may actually use the macro on its own. \pkg{pgfmolbio} checks whether the command is surrounded by a |tikzpicture| and adds this environment if necessary. % % % \section{Displaying Parts of the Chromatogram} % \label{sec:ChrDisplayingParts} % % \DescribeOption{chromatogram/}{sample range}{1-500 step 1}'-'[' step ']\relax % \opt{sample range} selects the part of the chromatogram which \pkg{pgfmolbio} should display. The value for this key consists of two or three parts, separated by the keywords |-| and |step|. The package will draw the chromatogram data between the \meta{lower} and \meta{upper} boundary. There are two ways of specifying these limits: % \begin{enumerate} % \item If you enter a number, \pkg{pgfmolbio} includes the data from the \meta{lower} to the \meta{upper} sample point (Example~\ref{exa:ChrLimitsSamplePoints}). A \textit{sample point} represents one measurement of the fluorescence signal along the time axis, where the first sample point has index 1. One peak comprises about 20 sample points. % \begin{exampletable} % \caption{} % \label{exa:ChrLimitsSamplePoints} % \begin{examplecode} % \pmbchromatogram[sample range=200-600]{SampleScf.scf} % \end{examplecode} % \end{exampletable} % \item If you enter the keyword |base| followed by an optional space and a number, the chromatogram starts or stops at the peak corresponding to the respective base. The first detected base peak has index 1. Compare Examples~\ref{exa:ChrLimitsSamplePoints} and~\ref{exa:ChrLimitsBases} to see the difference. % \end{enumerate} % The optional third part of the value for \opt{sample range} orders the package to draw every \ometa{int}th sample point. If your document contains large chromatograms or a great number of them, drawing fewer sample points increases typesetting time at the cost of image quality (Example~\ref{exa:ChrSampleStep}). Nevertheless, the key may be especially useful while optimizing the layout of complex chromatograms. % % \begin{exampletable}[p] % \caption{} % \label{exa:ChrLimitsBases} % \begin{examplecode} % \pmbchromatogram[% % sample range=base 50-base60 % ]{SampleScf.scf} % \end{examplecode} % \end{exampletable} % % \begin{exampletable}[p] % \caption{} % \label{exa:ChrSampleStep} % \pgfmolbioset[chromatogram]{canvas height=1cm} % \begin{examplecode} % \pmbchromatogram[% % sample range=base 20-base 50 step 1 % ]{SampleScf.scf} % \end{examplecode} % \begin{examplecode} % \pmbchromatogram[% % sample range=base 20-base 50 step 2 % ]{SampleScf.scf} % \end{examplecode} % \begin{examplecode} % \pmbchromatogram[% % sample range=base 20-base 50 step 4 % ]{SampleScf.scf} % \end{examplecode} % \end{exampletable} % % % \section{General Layout} % \label{sec:ChrGeneralLayout} % % \DescribeOption{chromatogram/}{x unit}{0.2mm} % \DescribeOption{chromatogram/}{y unit}{0.01mm} % These keys set the horizontal distance between two consecutive sample points and the vertical distance between two fluorescence intensity values, respectively. Example~\ref{exa:Chrxyunit} illustrates how you can enlarge a chromatogram twofold by doubling these values. % \begin{exampletable} % \caption{} % \label{exa:Chrxyunit} % \begin{examplecode} % \pmbchromatogram[% % sample range=base 50-base 60, % x unit=0.4mm, % y unit=0.02mm % ]{SampleScf.scf} % \end{examplecode} % \end{exampletable} % \DescribeOption{chromatogram/}{samples per line}{500} % \DescribeOption{chromatogram/}{baseline skip}{3cm} % A new chromatogram ``line'' starts after \meta{number} sample points, and the baselines of adjacent lines (i.\,e., the $y$-value of fluorescence signals with zero intensity) are separated by \meta{dimension}. In Example~\ref{exa:ChrSamplesPerLine}, you see two lines, each of which contains 250 of the 500 sample points drawn. Furthermore, the baselines are 3.5~cm apart. % \begin{exampletable} % \caption{} % \label{exa:ChrSamplesPerLine} % \begin{examplecode} % \begin{tikzpicture}% % [decoration=brace] % \pmbchromatogram[% % sample range=401-900, % samples per line=250, % baseline skip=3.5cm % ]{SampleScf.scf} % \draw[decorate] % (-0.1cm, -3.5cm) -- (-0.1cm, 0cm) % node[pos=0.5, rotate=90, above=5pt] % {baseline skip}; % \end{tikzpicture} % \end{examplecode} % \end{exampletable} % % \DescribeOption[/.style=]{chromatogram/}{canvas style}{draw=none, fill=none}