#! /usr/bin/env perl use strict; use warnings; use Term::ANSIColor; use Encode; use POSIX qw(locale_h); use locale; setlocale(LC_CTYPE,"no_NO"); my $versionnumber="2.2"; my $versiondate="2009 Apr 30"; ###### Set CMD specific settings and variables # Options and states my $verbose=0; my $showcodes=1; my $showstates=0; my $showsubcounts=0; my $htmlstyle=0; my $includeTeX=0; my $briefsum=0; my @sumweights; my $utf8flag=0; # Global variables my $blankline=0; my $errorcount=0; # CMD specific global variables my $totalflag=0; my @filelist; my $workdir; my $globalworkdir=""; ###### Set global settings and variables ### Macros for headers # Macros that identify headers: i.e. following token or # {...} is counted as header. The =>[2] indicates transition to # state 2 which is used within headers (although the value is # actually never used). This is copied to %TeXmacro and the # only role of defining it here is that the counter for the number # of headers is incremented by one. my %TeXheader=('\title'=>[2],'\part'=>[2],'\chapter'=>[2], '\section'=>[2],'\subsection'=>[2],'\subsubsection'=>[2], '\paragraph'=>[2],'\subparagraph'=>[2]); ### How many tokens to gobble after macro # Each macro is assumed to gobble up a given number of # tokens (or {...} groups), as well as options [...] before, within # and after. The %TeXmacro hash gives a link from a macro # (or beginNAME for begin-end groups without the backslash) # to either an integer giving the number of tokens to ignore # or to an array (specified as [num,num,...]) of length N where # N is the number of tokens to be read with the macro and the # array values tell how each is to be interpreted (see the status # values: 0=ignore, 1=count, etc.). Thus specifying a number N is # equivalent to specifying an array [0,...,0] of N zeros. # # For macros not specified here, the default value is 0: i.e. # no tokens are excluded, but [...] options are. Header macros # specified in %TeXheader are automatically included here. my %TeXmacro=(%TeXheader, '\documentclass'=>1,'\documentstyle'=>1,'\usepackage'=>1, '\hyphenation'=>1, '\pagestyle'=>1,'\thispagestyle'=>1, '\pagenumbering'=>1,'\markboth'=>1, '\markright'=>1, '\newcommand'=>[-3,-3],'\renewcommand'=>[-3,-3], '\newenvironment'=>[-3,-3,-3], 'renewenvironment'=>[-3,-3,-3], '\newfont'=>2,'\newtheorem'=>2,'\bibliographystyle'=>1, '\bibliography'=>1, '\parbox'=>1, '\marginpar'=>[3],'\makebox'=>0, '\raisebox'=>1, '\framebox'=>0, '\newsavebox'=>1, '\sbox'=>1, '\savebox'=>2, '\usebox'=>1,'\rule'=>2, '\footnote'=>[3],'\label'=>1, '\ref'=>1, '\pageref'=>1, '\bibitem'=>1, '\cite'=>1, '\citep'=>1, '\citet'=>1, '\citeauthor'=>1, '\citealt'=>1, '\nocite'=>1, '\eqlabel'=>1, '\eqref'=>1,'\hspace'=>1, '\vspace'=>1, '\addvspace'=>1, '\input'=>1, '\include'=>1, '\includeonly'=>1,'\includegraphics'=>1, '\newlength'=>1, '\setlength'=>2, '\addtolength'=>2,'\settodepth'=>2, '\settoheight'=>2, '\settowidth'=>2,'\newcounter'=>1, '\setcounter'=>2, '\addtocounter'=>2,'\stepcounter'=>1, '\refstepcounter'=>1, '\usecounter'=>1, '\alph'=>1, '\arabic'=>1, '\fnsymbol'=>1, '\roman'=>1, '\value'=>1, '\cline'=>1, '\multicolumn'=>3,'\typeout'=>1, '\typein'=>1, 'beginlist'=>2, 'beginminipage'=>1, 'begintabular'=>1, 'beginthebibliography'=>1,'beginlrbox'=>1, '\begin'=>1,'\end'=>1,'\title'=>[2]); ### Macros that should be counted as one or more words # Macros that represent text may be declared here. The value gives # the number of words the macro represents. my %TeXmacroword=('\LaTeX'=>1,'\TeX'=>1); ### Macros that are counted within the preamble # The preamble is the text between \documentclass and \begin{document}. # Text and macros in the preamble is ignored unless specified here. The # value is the status (1=text, 2=header, etc.) they should be interpreted as. # Note that only the first unit (token or {...} block) is counted. my %TeXpreamble=('\title'=>[2], '\newcommand'=>[-3,-3],'\renewcommand'=>[-3,-3], '\newenvironment'=>[-3,-3,-3], 'renewenvironment'=>[-3,-3,-3], ); ### Begin-End groups # Identified as begin-end groups, and define =>state. The # states used corresponds to the elements of the count array, and # are: # 0: Not included # 1: Text, word included in text count # 2: Header, words included in header count # 3: Float caption, words included in float caption count # 6: Inline mathematics, words not counted # 7: Displayed mathematics, words not counted # -1: Float, not included, but looks for captions # # 4 and 5 are used to count number of headers and floats # and are not used as states. # # Groups that are not defined will be counted as the surrounding text. # # Note that some environments may only exist within math-mode, and # therefore need not be defined here: in fact, they should not as it # is not clear if they will be in inlined or displayed math. # my %TeXgroup=('document'=>1,'letter'=>1,'titlepage'=>0, 'center'=>1,'flushleft'=>1,'flushright'=>1, 'abstract'=>1,'quote'=>1,'quotation'=>1,'verse'=>1,'minipage'=>1,'verbatim'=>1, 'description'=>1,'enumerate'=>1,'itemize'=>1,'list'=>1, 'theorem'=>1,'lemma'=>1,'definition'=>1,'corollary'=>1,'example'=>1, 'math'=>6,'displaymath'=>7,'equation'=>7,'eqnarray'=>7,'align'=>7, 'figure'=>-1,'float'=>-1,'picture'=>-1,'table'=>-1, 'tabbing'=>0,'tabular'=>0,'thebibliography'=>0,'lrbox'=>0); ### In floats: include only specific macros # Macros used to identify caption text within floats. my %TeXfloatinc=('\caption'=>[3]); ### Macros for including tex files # Allows \macro{file} or \macro file. If the value is 0, the filename will # be used as is; if it is 1, the filetype .tex will be added if the # filename is without filetype; if it is 2, the filetype .tex will be added. my %TeXfileinclude=('\input'=>1,'\include'=>2); ### Count labels # Labels used to describe the counts my @countlabel=('Files','Words in text','Words in headers', 'Words in float captions','Number of headers','Number of floats', 'Number of math inlines','Number of math displayed'); ### Break points # Definition of macros that define break points that start a new subcount. # The values given are used as labels. my %BreakPointsOptions; $BreakPointsOptions{'none'}={}; $BreakPointsOptions{'part'}={%{$BreakPointsOptions{'none'}},'\part'=>'Part'}; $BreakPointsOptions{'chapter'}={%{$BreakPointsOptions{'part'}},'\chapter'=>'Chapter'}; $BreakPointsOptions{'section'}={%{$BreakPointsOptions{'chapter'}},'\section'=>'Section'}; $BreakPointsOptions{'subsection'}={%{$BreakPointsOptions{'section'}},'\subsection'=>'Subsection'}; $BreakPointsOptions{'default'}=$BreakPointsOptions{'subsection'}; my %BreakPoints=%{$BreakPointsOptions{'none'}}; ### Print styles # Definition of different print styles: maps of class labels # to ANSI codes. Class labels are as used by HTML styles. my @STYLES=(); my %STYLE; $STYLES[0]={'error'=>'bold red'}; $STYLES[1]={%{$STYLES[0]}, 'word1'=>'blue','word2'=>'bold blue','word3'=>'blue', 'grouping'=>'red','document'=>'red','mathgroup'=>'magenta', 'state'=>'cyan underline','sumcount'=>'yellow'}; $STYLES[2]={%{$STYLES[1]}, 'command'=>'green','exclcommand'=>'yellow','exclgroup'=>'yellow','exclmath'=>'yellow', 'ignore'=>'cyan'}; $STYLES[3]={%{$STYLES[2]}, 'tc'=>'bold yellow','comment'=>'yellow','option'=>'yellow', 'fileinclude'=>'bold green'}; $STYLES[4]={%{$STYLES[3]}}; ### Word regexp pattern list # List of regexp patterns that should be analysed as words. # Use @ to represent a letter, will be substituted with $LetterPattern. my @WordPatterns=('(@+\.)+@+\.?','@+([\-\']@+)*'); my $specialchars='\\\\(ae|AE|o|O|aa|AA)'; my $modifiedchars='\\\\[\'\"\`\~\^\=](\w|\{\w\})'; my $LetterPattern='\w'; my $LetterPatternRelaxed='([\w\-\']|'.$modifiedchars.'|'.$specialchars.'(\{\})?|\{'.$specialchars.'\}|\{\w\})'; my %NamedWordPattern; $NamedWordPattern{'chinese'}='\p{script=Han}'; $NamedWordPattern{'japanese'}='(\p{script=Han}|\p{script=Hiragana}|\p{script=Katakana})'; ### Macro option regexp list # List of regexp patterns to be gobbled as macro option in and after # a macro. my @MacroOptionPatterns=('\[(\w|[,\-\s\~\.\:\;\+\?\*\_\=])*\]'); my @MacroOptionPatternsRelaxed=('\[[^\[\]\n]*\]'); ###### Main script ################################################### MAIN(@ARGV); ################################################### ######### ######### Main routines ######### # MAIN ROUTINE: Handle arguments, then parse files sub MAIN { my @args=@_; my @toplevelfiles=Parse_Arguments(@args); Apply_Options(); if (scalar(@toplevelfiles)==0) { conditional_print_help_style() || print_error("No files specified.","p","error"); } else { conditional_print_help_style(); my $totalcount=parse_file_list(@toplevelfiles); conditional_print_total($totalcount); } Report_ErrorCount(); Close_Output(); } # Checks arguments, exits on exit condition sub Check_Arguments { my @args=@_; if (!@args) { print_version(); print_syntax(); print_reference(); exit; } elsif ($args[0]=~/^(\-?\-(h|\?|help)|\/(\?|h))$/) { print_help(); exit; } elsif ($args[0]=~/^\-?\-(ver|version)$/) { print_version(); exit; } elsif ($args[0]=~/^\-?\-(lic|license)$/) { print_license(); exit; } return 1; } # Parses arguments, sets options (global) and returns file list sub Parse_Arguments { my @args=@_; Check_Arguments(@args); my @files; foreach my $arg (@ARGV) { if (Parse_Option($arg)) {next;} if ($arg=~/^\-/) { print 'Invalid opton '.$arg."\n"; print_syntax(); exit; } $arg=~s/\\/\//g; push @files,$arg; } return @files; } # Parse individual option parameters sub Parse_Option { my $arg=shift @_; return parse_options_parsing($arg) || parse_options_sums($arg) || parse_options_output($arg) || parse_options_format($arg) ; } sub parse_options_parsing { my $arg=shift @_; if ($arg eq '-inc') {$includeTeX=1;} elsif ($arg eq '-noinc') {$includeTeX=0;} elsif ($arg eq '-dir') {$globalworkdir=undef;} elsif ($arg=~/^-dir=(.*)$/) {$globalworkdir=$1;} elsif ($arg=~/^-(utf8|unicode)$/) {$utf8flag=1;} elsif ($arg=~/^-(ch|chinese|zhongwen)$/) { $utf8flag=1; @WordPatterns=($NamedWordPattern{'chinese'},@WordPatterns); } elsif ($arg=~/^-(jp|japanese)$/) { $utf8flag=1; @WordPatterns=($NamedWordPattern{'japanese'},@WordPatterns); } elsif ($arg eq '-relaxed') { @MacroOptionPatterns=@MacroOptionPatternsRelaxed; $LetterPattern=$LetterPatternRelaxed; } else {return 0;} return 1; } sub parse_options_sums { my $arg=shift @_; if ($arg=~/^-sum(=(.+))?$/) {option_sum($2);} elsif ($arg=~/^-(sub|subcounts?)(=(.+))?$/) {option_subcount($3);} else {return 0;} return 1; } sub option_subcount { my $arg=shift @_; $showsubcounts=1; if (!defined $arg) { %BreakPoints=%{$BreakPointsOptions{'default'}}; } elsif (my $option=$BreakPointsOptions{$arg}) { %BreakPoints=%{$option}; } else { print STDERR "Warning: Option value ".$arg." not valid, using default instead.\n"; %BreakPoints=%{$BreakPointsOptions{'default'}}; } } sub option_sum { my $arg=shift @_; if (!defined $arg) { @sumweights=(1,1,1,0,0,1,1); } elsif ($arg=~/^(\d+(,\d+){0,6})$/) { @sumweights=split(',',$1); } else { print STDERR "Warning: Option value ".$arg." not valid, ignoring option.\n"; } } sub parse_options_format { my $arg=shift @_; if ($arg eq '-brief') {$briefsum=1;} elsif ($arg eq '-total') {$totalflag=1;} elsif ($arg eq '-1') {$briefsum=1;$totalflag=1;$verbose=-1;} elsif ($arg eq "-html" ) {option_no_colours();$htmlstyle = 2;} elsif ($arg eq "-htmlcore" ) {option_no_colours();$htmlstyle = 1;} elsif ($arg=~/^\-(nocol|nc$)/) {option_no_colours();} elsif ($arg eq '-codes') { $showcodes=2; if ($verbose==0) {$verbose=3;} } elsif ($arg eq '-nocodes') {$showcodes=0;} else {return 0;} return 1; } sub parse_options_output { my $arg=shift @_; if ($arg eq "-v0") {$verbose=0;} elsif ($arg eq "-v1") {$verbose=1;} elsif ($arg eq '-vv' || $arg eq '-v2') {$verbose=2;} elsif ($arg eq '-vvv' || $arg eq '-v3' || $arg eq '-v') {$verbose=3;} elsif ($arg eq '-vvvv' || $arg eq '-v4') {$verbose=3; $showstates=1;} elsif ($arg =~ /^\-showstates?$/ ) {$showstates=1;} elsif ($arg =~ /^-(q|-?quiet)$/ ) {$verbose=-1;} else {return 0;} return 1; } # Parse file list and return total count sub parse_file_list { my @filelist=@_; my $listtotalcount=new_count("TOTAL COUNT"); for my $file (<@filelist>) { my $filetotalcount=parse_file($file); add_count($listtotalcount,$filetotalcount); } return $listtotalcount; } # Parse file and included files, and return total count sub parse_file { my $file=shift @_; $workdir=$globalworkdir; if (!defined $workdir) { $workdir=$file; $workdir =~ s/^((.*[\\\/])?)[^\\\/]+$/$1/; } @filelist=($file); if ($htmlstyle) {print "\n
\n";} while (!($tex->{'eof'})) { parse_unit($tex,1); } if ($htmlstyle && $verbose) {print "
\n";} } # Parse one block or unit sub parse_unit { # Status: # 0 = exclude from count # 1 = text # 2 = header text # 3 = float text # -1 = float (exclude) # -2 = strong exclude, ignore begin-end groups # -3 = stronger exclude, do not parse macro parameters # -9 = preamble (between \documentclass and \begin{document}) my ($tex,$status,$end)=@_; if (!defined $status) { print_error("CRITICAL ERROR: Undefined parser status!"); exit; } elsif (ref($status) eq 'ARRAY') { print_error("CRITICAL ERROR: Invalid parser status!"); exit; } my $substat; if ($showstates) { if (defined $end) { $tex->{'printstate'}=':'.$status.':'.$end.':'; } else { $tex->{'printstate'}=':'.$status.':'; } flush_next($tex); } while (defined (my $next=next_token($tex))) { # parse next token; or tokens until match with $end set_style($tex,"ignore"); if ((defined $end) && ($end eq $next)) { # end of unit return; } elsif (!defined $next) { print_error("ERROR: End of file while waiting for ".$end); return; } if ($status==-9 && $next eq '\begin' && $tex->{'line'}=~/^\{\s*document\s*\}/) { # \begin{document} $status=1; } if ($next eq '\documentclass') { # starts preamble set_style($tex,'document'); gobble_option($tex); gobble_macro_parms($tex,1); while (!($tex->{'eof'})) { parse_unit($tex,-9); } } elsif ($tex->{'type'}==666) { # parse TC instructions parse_tc($tex); } elsif ($tex->{'type'}==1) { # word if ($status>0) { inc_count($tex,$status); set_style($tex,'word'.$status); } } elsif ($next eq '{') { # {...} parse_unit($tex,$status,'}'); } elsif ($tex->{'type'}==3 && $status==-3) { set_style($tex,'ignore'); } elsif ($tex->{'type'}==3) { # macro call parse_macro($tex,$next,$status,$substat); } elsif ($next eq '$') { # math inline parse_math($tex,$status,6,'$'); } elsif ($next eq '$$') { # math display (unless already in inlined math) if (!(defined $end && $end eq '$')) { parse_math($tex,$status,7,'$$'); } } if (!defined $end) {return;} } } sub parse_macro { my ($tex,$next,$status,$substat)=@_; if (my $label=$BreakPoints{$next}) { if ($tex->{'line'}=~ /^[*]?(\s*\[.*?\])*\s*\{(.+?)\}/ ) { $label=$label.': '.$2; } add_subcount($tex,$label); } set_style($tex,$status>0?'command':'exclcommand'); if ($next eq '\begin' && $status!=-2) { parse_begin_end($tex,$status); } elsif (($status==-1) && ($substat=$TeXfloatinc{$next})) { # text included from float set_style($tex,'command'); gobble_macro_parms($tex,$substat); } elsif ($status==-9 && defined ($substat=$TeXpreamble{$next})) { # parse preamble include macros set_style($tex,'command'); if (defined $TeXheader{$next}) {inc_count($tex,4);} gobble_macro_parms($tex,$substat,1); } elsif ($status<0) { # ignore gobble_option($tex); } elsif ($next eq '\(') { # math inline parse_math($tex,$status,6,'\)'); } elsif ($next eq '\[') { # math display parse_math($tex,$status,7,'\]'); } elsif ($next eq '\def') { # ignore \def... $tex->{'line'} =~ s/^([^\{]*)\{/\{/; flush_next($tex); print_style($1.' ','ignore'); parse_unit($tex,-2); } elsif (defined (my $addsuffix=$TeXfileinclude{$next})) { # include file: queue up for parsing parse_include_file($tex,$status,$addsuffix); } elsif (defined ($substat=$TeXmacro{$next})) { # macro: exclude options if (defined $TeXheader{$next}) {inc_count($tex,4);} gobble_macro_parms($tex,$substat,$status); } elsif (defined ($substat=$TeXmacroword{$next})) { # count macro as word (or a given number of words) inc_count($tex,$status,$substat); set_style($tex,'word'.$status); } elsif ($next =~ /^\\[^\w\_]/) { } else { gobble_option($tex); } } sub parse_tc { my ($tex)=@_; my $next=$tex->{'next'}; set_style($tex,'tc'); flush_next($tex); if (!($next=~s/^\%+TC:\s*(\w+)\s*// )) { print_error('Warning: TC command should have format %TC:instruction [macro] [parameters]'); return; }; my $instr=$1; if ($instr=~/^(break)$/) { if ($instr eq 'break') {add_subcount($tex,$next);} } elsif ($next=~/^([\\]*\w+)\s+([^\s\n]+)(\s+([0-9]+))?/) { # Format = TC:word macro my $macro=$1; my $param=$2; my $option=$4; if ($param=~/^\[([0-9,]+)\]$/) {$param=[split(',',$1)];} if (($instr eq 'macro') || ($instr eq 'exclude')) {$TeXmacro{$macro}=$param;} elsif ($instr eq 'header') {$TeXheader{$macro}=$param;$TeXmacro{$macro}=$param;} elsif ($instr eq 'macroword') {$TeXmacroword{$macro}=$param;} elsif ($instr eq 'preambleinclude') {$TeXpreamble{$macro}=$param;} elsif ($instr eq 'group') { $TeXmacro{'begin'.$macro}=$param; $TeXgroup{$macro}=$option; } elsif ($instr eq 'floatinclude') {$TeXfloatinc{$macro}=$param;} elsif ($instr eq 'fileinclude') {$TeXfileinclude{$macro}=$param;} elsif ($instr eq 'breakmacro') {$BreakPoints{$macro}=$param;} else {print_error("Warning: Unknown TC command: ".$instr);} } elsif ($instr eq 'ignore') { tc_ignore_input($tex); } else { print_error("Warning: Invalid TC command format: ".$instr); } } sub tc_ignore_input { my ($tex)=@_; set_style($tex,'ignore'); parse_unit($tex,-3,"%TC:endignore"); set_style($tex,'tc'); flush_next($tex); } sub parse_math { my ($tex,$status,$substat,$end)=@_; my $localstyle=$status>0 ? 'mathgroup' : 'exclmath'; if ($status>0) {inc_count($tex,$substat);} set_style($tex,$localstyle); parse_unit($tex,0,$end); set_style($tex,$localstyle); } sub parse_begin_end { my ($tex,$status)=@_; my $localstyle=$status>0 ? 'grouping' : 'exclgroup'; flush_style($tex,$localstyle); gobble_option($tex); my $groupname; if ($tex->{'line'} =~ s/^\{\s*([^\{\}]+)\s*\*?\}[ \t\r\f]*//) { # gobble group type $groupname=$1; print_style('{'.$1.'}',$localstyle); my $next='begin'.$1; if (defined (my $substat=$TeXmacro{$next})) { gobble_macro_parms($tex,$substat); } } else { print_error("Warning: BEGIN group without type."); } # find group status (or leave unchanged) my $substat; defined ($substat=$TeXgroup{$1}) || ($substat=$status); if ($status<=0 && $status<$substat) {$substat=$status;} if (($status>0) && ($substat==-1)) { # Count float inc_count($tex,5); } if ($status>0 and $substat>3) { # count item, exclude contents inc_count($tex,$substat); $substat=0; } parse_unit($tex,$substat,'\end'); if ($tex->{'line'} =~ s/^\{\s*([^\{\}]+)\s*\}[ \t\r\f]*//) { # gobble group type flush_style($tex,$localstyle); print_style('{'.$1.'}',$localstyle); } else { print_error("Warning: END group without type while waiting to end ".$groupname."."); } } sub parse_include_file { my ($tex,$status,$addsuffix)=@_; $tex->{'line'} =~ s/^\{([^\{\}\s]+)\}// || $tex->{'line'} =~ s/^\s*([^\{\}\%\\\s]+)// || return; flush_next($tex); if ($status>0) { print_style($&,'fileinclude'); my $fname=$1; if ($addsuffix==2) {$fname.='.tex';} elsif ($addsuffix==1 && ($fname=~/^[^\.]+$/)) {$fname.='.tex';} if ($includeTeX) {add_file_to_list($fname);} } else { print_style($&,'ignored'); } } sub gobble_option { my $tex=shift @_; flush_next($tex); foreach my $pattern (@MacroOptionPatterns) { if ($tex->{'line'}=~s/^($pattern)//) { print_style($1,'option'); return $1; } } return undef; } sub gobble_options { while (gobble_option(@_)) {} } sub gobble_macro_modifier { my $tex=shift @_; flush_next($tex); if ($tex->{'line'} =~ s/^\*//) { print_style($1,'option'); return $1; } return undef; } sub gobble_macro_parms { my ($tex,$parm,$oldstat)=@_; my $i; if (ref($parm) eq 'ARRAY') { $i=scalar @{$parm}; } else { $i=$parm; $parm=[0,0,0,0,0,0,0,0,0]; } if ($i>0) {gobble_macro_modifier($tex);} gobble_options($tex); for (my $j=0;$j<$i;$j++) { parse_unit($tex,new_status($parm->[$j],$oldstat)); gobble_options($tex); } } sub new_status { my ($substat,$old)=@_; if (!defined $old) {return $substat;} if ($old==-3 || $substat==-3) {return -3;} if ($old==-2 || $substat==-2) {return -2;} if ($old==0 || $substat==0) {return 0;} if ($old==-9 || $substat==-9) {return -9;} if ($old>$substat) {return $old;} return $substat; } sub next_token { my $tex=shift @_; my ($next,$type); if (defined $tex->{'next'}) {print_style($tex->{'next'}.' ',$tex->{'style'});} $tex->{'style'}=undef; while (defined ($next=get_next_token($tex))) { $type=$tex->{'type'}; if ($type==0) { print_style($next,'comment'); } elsif ($type==9) { if ($verbose>0) {line_return(1,$tex);} } else { return $next; } } return $next; } sub get_next_token { # Token (or token group) category: # 0: comment # 1: word (or other forms of text or text components) # 2: symbol (not word, e.g. punctuation) # 3: macro # 4: curly braces {} # 5: brackets [] # 6: maths # 9: line break in file # 999: end of line or blank line # 666: TeXcount instruction (%TC:instruction) my $tex=shift @_; my $next; (defined ($next=get_token($tex,'\%+TC:\s*endignore\b[^\r\n]*',666))) && return "%TC:endignore"; (defined ($next=get_token($tex,'\%+TC:[^\r\n]*',666))) && return $next; (defined ($next=get_token($tex,'\%[^\r\n]*',0))) && return $next; (defined ($next=get_token($tex,'(\r|\n|\r\n)',9))) && return $next; (defined ($next=get_token($tex,'\\\\[\{\}]',2))) && return $next; foreach my $pattern (@WordPatterns) { (defined ($next=get_token($tex,$pattern,1))) && return $next; } (defined ($next=get_token($tex,'[\"\'\`:\.,\(\)\[\]!\+\-\*=/\^\_\@\<\>\~\#\&]',2))) && return $next; (defined ($next=get_token($tex,'\\\\([a-zA-Z_]+|[^a-zA-Z_])',3))) && return $next; (defined ($next=get_token($tex,'[\{\}]',4))) && return $next; (defined ($next=get_token($tex,'[\[\]]',5))) && return $next; (defined ($next=get_token($tex,'\$\$',6))) && return $next; (defined ($next=get_token($tex,'\$',6))) && return $next; (defined ($next=get_token($tex,'.',999))) && return $next; (defined ($next=get_token($tex,'[^\s]+',999))) && return $next; $tex->{'eof'}=1; return undef; } sub get_token { my ($tex,$regexp,$type)=@_; if (!defined $regexp) {print_error("ERROR in get_token: undefined regex.");} if (!defined $tex->{'line'}) {print_error("ERROR in get_token: undefined tex-line. ".$tex->{'next'});} if ( $tex->{'line'} =~ s/^($regexp)[ \t\r\f]*// ) { $tex->{'next'}=$1; $tex->{'type'}=$type; return $1; } return undef; } ###### Count handling routines sub new_count { my ($title)=@_; my @cnt=(0,0,0,0,0,0,0,0); my %count=('count'=>\@cnt,'title'=>$title); # files, text words, header words, float words, # headers, floats, math-inline, math-display; return \%count; } sub inc_count { my ($tex,$type,$value)=@_; my $count=$tex->{'count'}; if (!defined $value) {$value=1;} ${$count->{'count'}}[$type]+=$value; } sub get_count { my ($count,$type)=@_; return ${$count->{'count'}}[$type]; } sub total_count { my ($count)=@_; my $sum=0; for (my $i=scalar(@sumweights);$i-->0;) { $sum+=get_count($count,$i+1)*$sumweights[$i]; } return $sum; } sub print_count { my ($count,$header)=@_; if ($briefsum && @sumweights) { print_count_total($count,$header); } elsif ($briefsum) { if ($htmlstyle) {print "";} print_count_brief($count,$header); if ($htmlstyle) {print "
\n";} } else { print_count_details($count,$header); } } sub print_count_with_header { my ($count,$header)=@_; if (!defined $header) {$header=$count->{'title'};} if (!defined $header) {$header="";} return $count,$header; } sub print_count_total { my ($count,$header)=print_count_with_header(@_); if ($htmlstyle) {print "".$header;} print total_count($count); if ($htmlstyle) {print "
\n";} else {print ": ".$header;} } sub print_count_brief { my ($count,$header)=print_count_with_header(@_); my $cnt=$count->{'count'}; print ${$cnt}[1]."+".${$cnt}[2]."+".${$cnt}[3]. " (".${$cnt}[4]."/".${$cnt}[5]."/".${$cnt}[6]."/".${$cnt}[7].") ". $header; } sub print_count_details { my ($count,$header)=print_count_with_header(@_); if ($htmlstyle) {print "";} help_style_line('Text which is counted',"word1","counted as text words"); help_style_line('Header and title text',"word2","counted as header words"); help_style_line('Caption text and footnotes',"word3","counted as caption words"); help_style_line("Ignored text or code","ignore","excluded or ignored"); help_style_line('\documentclass',"document","document start, beginning of preamble"); help_style_line('\macro',"command","macro not counted, but parameters may be"); help_style_line('\macro',"exclcommand","macro in excluded region"); help_style_line("[Macro options]","option","not counted"); help_style_line('\begin{group} \end{group}',"grouping","begin/end group"); help_style_line('\begin{group} \end{group}',"exclgroup","begin/end group in excluded region"); help_style_line('$ $',"mathgroup","counted as one equation"); help_style_line('$ $',"exclmath","equation in excluded region"); help_style_line('% Comments',"comment","not counted"); help_style_line('%TC:TeXcount instructions',"tc","not counted"); help_style_line("File to include","fileinclude","not counted but file may be counted later"); if ($showstates) { help_style_line('[state]',"state","internal TeXcount state"); } if (@sumweights) { help_style_line('[sumcount]',"sumcount","cumulative sum count"); } help_style_line("ERROR","error","TeXcount error message"); if ($htmlstyle) {print "
";} print "\n\n"; } sub help_style_line { my ($text,$style,$comment)=@_; if ($htmlstyle) { $comment=" .... ".$comment; } else { $comment=" .... ".$comment; } if (print_style($text,$style)) { print $comment; linebreak(); } } # Print output style codes if conditions are met sub conditional_print_help_style { if ($showcodes) {print_help_style();} return $showcodes; } ###### HTML routines sub html_head { print "\n"; if ($utf8flag) { print "\n"; } print '