diff options
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/autostart | 3 | ||||
-rwxr-xr-x[-rw-r--r--] | bin/backup-profiles | 53 | ||||
-rwxr-xr-x | bin/backup-settings | 19 | ||||
-rwxr-xr-x | bin/flac2mp3 | 19 | ||||
-rwxr-xr-x | bin/lowercase | 23 | ||||
-rwxr-xr-x | bin/nvsetup | 10 | ||||
-rwxr-xr-x | bin/rmcache | 31 | ||||
-rwxr-xr-x | bin/tex2pdf | 24 | ||||
-rwxr-xr-x | bin/texcount.pl | 1391 |
9 files changed, 1565 insertions, 8 deletions
diff --git a/bin/autostart b/bin/autostart index 1fac332..8280ce3 100755 --- a/bin/autostart +++ b/bin/autostart @@ -1,4 +1,7 @@ #!/bin/bash +xset -dpms +xset s 3600 3600 +xset s off # make RT check and don't start! KERNEL_NAME=$(uname -r) if [[ "$KERNEL_NAME" != *rt* ]]; then diff --git a/bin/backup-profiles b/bin/backup-profiles index 2f87701..66de7af 100644..100755 --- a/bin/backup-profiles +++ b/bin/backup-profiles @@ -1,10 +1,47 @@ -#!/bin/bash +#!/bin/sh -cd /tmp -tar cvfz firefox.tgz $1/.mozilla/firefox/david.runge-backup -gpg -e -r 'David Runge <david.runge@frqrec.com>' firefox.tgz -mv -f firefox.tgz.gpg $1/Dropbox/sync +firefoxPID=`ps -C firefox -o pid=` +thunderbirdPID=`ps -C thunderbird -o pid=` + +firefoxTMP=/tmp/firefox.tgz +thunderbirdTMP=/tmp/thunderbird.tgz +firefoxIF=~/.mozilla/firefox/david.runge +thunderbirdIF=~/.thunderbird/dvzrv +OF=~/Dropbox/sync +gpgEND=.gpg + + +if [[ -z "$firefoxPID" ]] +then + echo "Firefox is not running." + echo "Backing up profile." + tar cfPzh $firefoxTMP $firefoxIF + echo "Encrypting profile." + gpg -e -r 'David Runge <david.runge@frqrec.com>' $firefoxTMP + echo "Moving backup to Dropbox." + mv -f $firefoxTMP$gpgEND $OF + echo "Cleaning up." + rm $firefoxTMP + echo "Done." +else + echo "Firefox is still running." + echo "Skipping backup." +fi + +if [[ -z "$thunderbirdPID" ]] +then + echo "Thunderbird is not running." + echo "Backing up profile." + tar cfPz $thunderbirdTMP $thunderbirdIF + echo "Encrypting profile." + gpg -e -r 'David Runge <david.runge@frqrec.com>' $thunderbirdTMP + echo "Moving backup to Dropbox." + mv -f $thunderbirdTMP$gpgEND $OF + echo "Cleaning up." + rm $thunderbirdTMP + echo "Done." +else + echo "Thunderbird is still running." + echo "Skipping backup." +fi -tar cvfz thunderbird.tgz $1/.thunderbird/david.runge -gpg -e -r 'David Runge <david.runge@frqrec.com>' thunderbird.tgz -mv -f thunderbird.tgz.gpg $1/Dropbox/sync diff --git a/bin/backup-settings b/bin/backup-settings new file mode 100755 index 0000000..955a59e --- /dev/null +++ b/bin/backup-settings @@ -0,0 +1,19 @@ +#! /bin/sh + +etcTMP=/tmp/etc.tgz +etcIF=/etc +OF=/home/dave/Dropbox/sync +gpgEND=.gpg + +echo "Backing up /etc directory." +tar cfPz $etcTMP $etcIF +echo "Encrypting compressed directory." +su - dave -c "gpg -e -r 'David Runge <david.runge@frqrec.com>' $etcTMP" +echo "Changing permissions on file." +chown dave:dave $etcTMP$gpgEND +echo "Moving backup to Dropbox." +mv -f $etcTMP$gpgEND $OF +echo "Cleaning up." +rm $etcTMP +echo "Done." + diff --git a/bin/flac2mp3 b/bin/flac2mp3 new file mode 100755 index 0000000..9dd970b --- /dev/null +++ b/bin/flac2mp3 @@ -0,0 +1,19 @@ +find -name *.flac -print0 | while read -d $'\0' a + + +do +OUTF=`echo "$a" | sed s/\.flac$/.mp3/g` + +ARTIST=`metaflac "$a" --show-tag=ARTIST | sed s/.*=//g` +TITLE=`metaflac "$a" --show-tag=TITLE | sed s/.*=//g` +ALBUM=`metaflac "$a" --show-tag=ALBUM | sed s/.*=//g` +GENRE=`metaflac "$a" --show-tag=GENRE | sed s/.*=//g` +TRACKNUMBER=`metaflac "$a" --show-tag=TRACKNUMBER | sed s/.*=//g` +DATE=`metaflac "$a" --show-tag=DATE | sed s/.*=//g` + +flac -c -d "$a" | lame -m j -q 0 --vbr-new -V 0 -s 44.1 - "$OUTF" +id3 -t "$TITLE" -T "${TRACKNUMBER:-0}" -a "$ARTIST" -A "$ALBUM" -y "$DATE" -g "${GENRE:-12}" "$OUTF" + +done + + diff --git a/bin/lowercase b/bin/lowercase new file mode 100755 index 0000000..09273b0 --- /dev/null +++ b/bin/lowercase @@ -0,0 +1,23 @@ +#!/bin/bash + +# lowerext.sh + +while read f; do + if [[ "$f" = *.* ]]; then + # Extract the basename + b="${f%.*}" + + # Extract the extension + x="${f##*.}" + + # Convert the extension to lower case + # Note: this only works in recent versions of Bash + l="${x,,}" + + if [[ "$x" != "$l" ]]; then + mv "$f" "$b.$l" + fi + else + continue + fi +done diff --git a/bin/nvsetup b/bin/nvsetup new file mode 100755 index 0000000..23adeb5 --- /dev/null +++ b/bin/nvsetup @@ -0,0 +1,10 @@ +#!/bin/bash + +SCREENCONNECTED=$(xrandr|grep DP-1) +if [[ -n "$(lsmod|grep nouveau)" ]]; then + if [[ "$SCREENCONNECTED" == *connected* && "$SCREENCONNECTED" != *disconnected* ]]; then + echo "Setup DP-1" +# sleep 5 + xrandr --output DP-1 --auto --primary --output LVDS-1 --auto --right-of DP-1 + fi +fi diff --git a/bin/rmcache b/bin/rmcache new file mode 100755 index 0000000..463ec10 --- /dev/null +++ b/bin/rmcache @@ -0,0 +1,31 @@ +#!/bin/bash + +USERDIR=$1 + +echo "rmcache cleaning up: $USERDIR" + +caches=".cache/chromium" +## take care of the whitespace! +caches+=" .config/chromium/Default/*Index*" +caches+=" .thumbnails" +caches+=" .opera/cache4" +caches+=" .opera/cache" +caches+=" .java/deployment/cache" +caches+=" .local/share/Trash" +caches+=" .gnome2/epiphany/mozilla/epiphany/Cache" +caches+=" .adobe/Acrobat/*/Cache" +caches+=" .adobe/Acrobat/*/Temp" +caches+=" .macromedia/Flash_Player/*" +caches+=" .adobe/Flash_Player/AssetCache" +caches+=" .java/deployment/cache" + +pushd $USERDIR > /dev/null + +for cache in $caches +do + echo "removing $cache" + rm -Rf "$cache" + [ $? -eq 0 ] || exit 1 +done + +popd > /dev/null diff --git a/bin/tex2pdf b/bin/tex2pdf new file mode 100755 index 0000000..ad53029 --- /dev/null +++ b/bin/tex2pdf @@ -0,0 +1,24 @@ +#!/bin/bash +echo "tex2pdf called with $1.tex">~/.tex2pdf.log +echo "#####################################################">>~/.tex2pdf.log +echo "################## pdflatex, run #1 #################">>~/.tex2pdf.log +echo "#####################################################">>~/.tex2pdf.log +pdflatex -interaction=nonstopmode $1.tex>>~/.tex2pdf.log +echo "bibtex:">>~/.tex2pdf.log +bibtex $1.aux>>~/.tex2pdf.log +echo "#####################################################">>~/.tex2pdf.log +echo "################## pdflatex, run #2 #################">>~/.tex2pdf.log +echo "#####################################################">>~/.tex2pdf.log +pdflatex -interaction=nonstopmode $1.tex>>~/.tex2pdf.log +echo "#####################################################">>~/.tex2pdf.log +echo "################## pdflatex, run #3 #################">>~/.tex2pdf.log +echo "#####################################################">>~/.tex2pdf.log +pdflatex -interaction=nonstopmode $1.tex>>~/.tex2pdf.log +echo "#####################################################">>~/.tex2pdf.log +echo "Launching evince with $1.pdf.">>~/.tex2pdf.log +evince $1.pdf & +echo "#####################################################">>~/.tex2pdf.log +echo "Removing aux,bbl,blg,dvi,lof,log,lot,nav,out,snm,toc.">>~/.tex2pdf.log +echo "Current directory: $(pwd) and children of depth 1.">>~/.tex2pdf.log +rm -f $1.{aux,bbl,blg,dvi,lof,log,lot,nav,out,snm,toc} +rm -f */*.{aux,bbl,blg,dvi,lof,log,lot,nav,out,snm,toc} diff --git a/bin/texcount.pl b/bin/texcount.pl new file mode 100755 index 0000000..1f47025 --- /dev/null +++ b/bin/texcount.pl @@ -0,0 +1,1391 @@ +#! /usr/bin/env perl +use strict; +use warnings; +use Term::ANSIColor; +use Encode; +use POSIX qw(locale_h); +use locale; +setlocale(LC_CTYPE,"no_NO"); + +my $versionnumber="2.2"; +my $versiondate="2009 Apr 30"; + +###### Set CMD specific settings and variables + +# Options and states +my $verbose=0; +my $showcodes=1; +my $showstates=0; +my $showsubcounts=0; +my $htmlstyle=0; +my $includeTeX=0; +my $briefsum=0; +my @sumweights; +my $utf8flag=0; + +# Global variables +my $blankline=0; +my $errorcount=0; + +# CMD specific global variables +my $totalflag=0; +my @filelist; +my $workdir; +my $globalworkdir=""; + +###### Set global settings and variables + +### Macros for headers +# Macros that identify headers: i.e. following token or +# {...} is counted as header. The =>[2] indicates transition to +# state 2 which is used within headers (although the value is +# actually never used). This is copied to %TeXmacro and the +# only role of defining it here is that the counter for the number +# of headers is incremented by one. +my %TeXheader=('\title'=>[2],'\part'=>[2],'\chapter'=>[2], + '\section'=>[2],'\subsection'=>[2],'\subsubsection'=>[2], + '\paragraph'=>[2],'\subparagraph'=>[2]); + +### How many tokens to gobble after macro +# Each macro is assumed to gobble up a given number of +# tokens (or {...} groups), as well as options [...] before, within +# and after. The %TeXmacro hash gives a link from a macro +# (or beginNAME for begin-end groups without the backslash) +# to either an integer giving the number of tokens to ignore +# or to an array (specified as [num,num,...]) of length N where +# N is the number of tokens to be read with the macro and the +# array values tell how each is to be interpreted (see the status +# values: 0=ignore, 1=count, etc.). Thus specifying a number N is +# equivalent to specifying an array [0,...,0] of N zeros. +# +# For macros not specified here, the default value is 0: i.e. +# no tokens are excluded, but [...] options are. Header macros +# specified in %TeXheader are automatically included here. +my %TeXmacro=(%TeXheader, + '\documentclass'=>1,'\documentstyle'=>1,'\usepackage'=>1, '\hyphenation'=>1, + '\pagestyle'=>1,'\thispagestyle'=>1, '\pagenumbering'=>1,'\markboth'=>1, '\markright'=>1, + '\newcommand'=>[-3,-3],'\renewcommand'=>[-3,-3], + '\newenvironment'=>[-3,-3,-3], 'renewenvironment'=>[-3,-3,-3], + '\newfont'=>2,'\newtheorem'=>2,'\bibliographystyle'=>1, '\bibliography'=>1, + '\parbox'=>1, '\marginpar'=>[3],'\makebox'=>0, '\raisebox'=>1, '\framebox'=>0, + '\newsavebox'=>1, '\sbox'=>1, '\savebox'=>2, '\usebox'=>1,'\rule'=>2, + '\footnote'=>[3],'\label'=>1, '\ref'=>1, '\pageref'=>1, '\bibitem'=>1, + '\cite'=>1, '\citep'=>1, '\citet'=>1, '\citeauthor'=>1, '\citealt'=>1, '\nocite'=>1, + '\eqlabel'=>1, '\eqref'=>1,'\hspace'=>1, '\vspace'=>1, '\addvspace'=>1, + '\input'=>1, '\include'=>1, '\includeonly'=>1,'\includegraphics'=>1, + '\newlength'=>1, '\setlength'=>2, '\addtolength'=>2,'\settodepth'=>2, + '\settoheight'=>2, '\settowidth'=>2,'\newcounter'=>1, '\setcounter'=>2, + '\addtocounter'=>2,'\stepcounter'=>1, '\refstepcounter'=>1, '\usecounter'=>1, + '\alph'=>1, '\arabic'=>1, '\fnsymbol'=>1, '\roman'=>1, '\value'=>1, + '\cline'=>1, '\multicolumn'=>3,'\typeout'=>1, '\typein'=>1, + 'beginlist'=>2, 'beginminipage'=>1, 'begintabular'=>1, + 'beginthebibliography'=>1,'beginlrbox'=>1, + '\begin'=>1,'\end'=>1,'\title'=>[2]); + +### Macros that should be counted as one or more words +# Macros that represent text may be declared here. The value gives +# the number of words the macro represents. +my %TeXmacroword=('\LaTeX'=>1,'\TeX'=>1); + +### Macros that are counted within the preamble +# The preamble is the text between \documentclass and \begin{document}. +# Text and macros in the preamble is ignored unless specified here. The +# value is the status (1=text, 2=header, etc.) they should be interpreted as. +# Note that only the first unit (token or {...} block) is counted. +my %TeXpreamble=('\title'=>[2], + '\newcommand'=>[-3,-3],'\renewcommand'=>[-3,-3], + '\newenvironment'=>[-3,-3,-3], 'renewenvironment'=>[-3,-3,-3], + ); + +### Begin-End groups +# Identified as begin-end groups, and define =>state. The +# states used corresponds to the elements of the count array, and +# are: +# 0: Not included +# 1: Text, word included in text count +# 2: Header, words included in header count +# 3: Float caption, words included in float caption count +# 6: Inline mathematics, words not counted +# 7: Displayed mathematics, words not counted +# -1: Float, not included, but looks for captions +# +# 4 and 5 are used to count number of headers and floats +# and are not used as states. +# +# Groups that are not defined will be counted as the surrounding text. +# +# Note that some environments may only exist within math-mode, and +# therefore need not be defined here: in fact, they should not as it +# is not clear if they will be in inlined or displayed math. +# +my %TeXgroup=('document'=>1,'letter'=>1,'titlepage'=>0, + 'center'=>1,'flushleft'=>1,'flushright'=>1, + 'abstract'=>1,'quote'=>1,'quotation'=>1,'verse'=>1,'minipage'=>1,'verbatim'=>1, + 'description'=>1,'enumerate'=>1,'itemize'=>1,'list'=>1, + 'theorem'=>1,'lemma'=>1,'definition'=>1,'corollary'=>1,'example'=>1, + 'math'=>6,'displaymath'=>7,'equation'=>7,'eqnarray'=>7,'align'=>7, + 'figure'=>-1,'float'=>-1,'picture'=>-1,'table'=>-1, + 'tabbing'=>0,'tabular'=>0,'thebibliography'=>0,'lrbox'=>0); + +### In floats: include only specific macros +# Macros used to identify caption text within floats. +my %TeXfloatinc=('\caption'=>[3]); + +### Macros for including tex files +# Allows \macro{file} or \macro file. If the value is 0, the filename will +# be used as is; if it is 1, the filetype .tex will be added if the +# filename is without filetype; if it is 2, the filetype .tex will be added. +my %TeXfileinclude=('\input'=>1,'\include'=>2); + +### Count labels +# Labels used to describe the counts +my @countlabel=('Files','Words in text','Words in headers', + 'Words in float captions','Number of headers','Number of floats', + 'Number of math inlines','Number of math displayed'); + +### Break points +# Definition of macros that define break points that start a new subcount. +# The values given are used as labels. +my %BreakPointsOptions; +$BreakPointsOptions{'none'}={}; +$BreakPointsOptions{'part'}={%{$BreakPointsOptions{'none'}},'\part'=>'Part'}; +$BreakPointsOptions{'chapter'}={%{$BreakPointsOptions{'part'}},'\chapter'=>'Chapter'}; +$BreakPointsOptions{'section'}={%{$BreakPointsOptions{'chapter'}},'\section'=>'Section'}; +$BreakPointsOptions{'subsection'}={%{$BreakPointsOptions{'section'}},'\subsection'=>'Subsection'}; +$BreakPointsOptions{'default'}=$BreakPointsOptions{'subsection'}; +my %BreakPoints=%{$BreakPointsOptions{'none'}}; + +### Print styles +# Definition of different print styles: maps of class labels +# to ANSI codes. Class labels are as used by HTML styles. +my @STYLES=(); +my %STYLE; +$STYLES[0]={'error'=>'bold red'}; +$STYLES[1]={%{$STYLES[0]}, + 'word1'=>'blue','word2'=>'bold blue','word3'=>'blue', + 'grouping'=>'red','document'=>'red','mathgroup'=>'magenta', + 'state'=>'cyan underline','sumcount'=>'yellow'}; +$STYLES[2]={%{$STYLES[1]}, + 'command'=>'green','exclcommand'=>'yellow','exclgroup'=>'yellow','exclmath'=>'yellow', + 'ignore'=>'cyan'}; +$STYLES[3]={%{$STYLES[2]}, + 'tc'=>'bold yellow','comment'=>'yellow','option'=>'yellow', + 'fileinclude'=>'bold green'}; +$STYLES[4]={%{$STYLES[3]}}; + +### Word regexp pattern list +# List of regexp patterns that should be analysed as words. +# Use @ to represent a letter, will be substituted with $LetterPattern. +my @WordPatterns=('(@+\.)+@+\.?','@+([\-\']@+)*'); +my $specialchars='\\\\(ae|AE|o|O|aa|AA)'; +my $modifiedchars='\\\\[\'\"\`\~\^\=](\w|\{\w\})'; +my $LetterPattern='\w'; +my $LetterPatternRelaxed='([\w\-\']|'.$modifiedchars.'|'.$specialchars.'(\{\})?|\{'.$specialchars.'\}|\{\w\})'; +my %NamedWordPattern; +$NamedWordPattern{'chinese'}='\p{script=Han}'; +$NamedWordPattern{'japanese'}='(\p{script=Han}|\p{script=Hiragana}|\p{script=Katakana})'; + +### Macro option regexp list +# List of regexp patterns to be gobbled as macro option in and after +# a macro. +my @MacroOptionPatterns=('\[(\w|[,\-\s\~\.\:\;\+\?\*\_\=])*\]'); +my @MacroOptionPatternsRelaxed=('\[[^\[\]\n]*\]'); + +###### Main script + + +################################################### + +MAIN(@ARGV); + +################################################### + + +######### +######### Main routines +######### + +# MAIN ROUTINE: Handle arguments, then parse files +sub MAIN { + my @args=@_; + my @toplevelfiles=Parse_Arguments(@args); + Apply_Options(); + if (scalar(@toplevelfiles)==0) { + conditional_print_help_style() + || print_error("No files specified.","p","error"); + } else { + conditional_print_help_style(); + my $totalcount=parse_file_list(@toplevelfiles); + conditional_print_total($totalcount); + } + Report_ErrorCount(); + Close_Output(); +} + +# Checks arguments, exits on exit condition +sub Check_Arguments { + my @args=@_; + if (!@args) { + print_version(); + print_syntax(); + print_reference(); + exit; + } elsif ($args[0]=~/^(\-?\-(h|\?|help)|\/(\?|h))$/) { + print_help(); + exit; + } elsif ($args[0]=~/^\-?\-(ver|version)$/) { + print_version(); + exit; + } elsif ($args[0]=~/^\-?\-(lic|license)$/) { + print_license(); + exit; + } + return 1; +} + +# Parses arguments, sets options (global) and returns file list +sub Parse_Arguments { + my @args=@_; + Check_Arguments(@args); + my @files; + foreach my $arg (@ARGV) { + if (Parse_Option($arg)) {next;} + if ($arg=~/^\-/) { + print 'Invalid opton '.$arg."\n"; + print_syntax(); + exit; + } + $arg=~s/\\/\//g; + push @files,$arg; + } + return @files; +} + +# Parse individual option parameters +sub Parse_Option { + my $arg=shift @_; + return parse_options_parsing($arg) + || parse_options_sums($arg) + || parse_options_output($arg) + || parse_options_format($arg) + ; +} + +sub parse_options_parsing { + my $arg=shift @_; + if ($arg eq '-inc') {$includeTeX=1;} + elsif ($arg eq '-noinc') {$includeTeX=0;} + elsif ($arg eq '-dir') {$globalworkdir=undef;} + elsif ($arg=~/^-dir=(.*)$/) {$globalworkdir=$1;} + elsif ($arg=~/^-(utf8|unicode)$/) {$utf8flag=1;} + elsif ($arg=~/^-(ch|chinese|zhongwen)$/) { + $utf8flag=1; + @WordPatterns=($NamedWordPattern{'chinese'},@WordPatterns); + } + elsif ($arg=~/^-(jp|japanese)$/) { + $utf8flag=1; + @WordPatterns=($NamedWordPattern{'japanese'},@WordPatterns); + } + elsif ($arg eq '-relaxed') { + @MacroOptionPatterns=@MacroOptionPatternsRelaxed; + $LetterPattern=$LetterPatternRelaxed; + } + else {return 0;} + return 1; +} + +sub parse_options_sums { + my $arg=shift @_; + if ($arg=~/^-sum(=(.+))?$/) {option_sum($2);} + elsif ($arg=~/^-(sub|subcounts?)(=(.+))?$/) {option_subcount($3);} + else {return 0;} + return 1; +} + +sub option_subcount { + my $arg=shift @_; + $showsubcounts=1; + if (!defined $arg) { + %BreakPoints=%{$BreakPointsOptions{'default'}}; + } elsif (my $option=$BreakPointsOptions{$arg}) { + %BreakPoints=%{$option}; + } else { + print STDERR "Warning: Option value ".$arg." not valid, using default instead.\n"; + %BreakPoints=%{$BreakPointsOptions{'default'}}; + } +} + +sub option_sum { + my $arg=shift @_; + if (!defined $arg) { + @sumweights=(1,1,1,0,0,1,1); + } elsif ($arg=~/^(\d+(,\d+){0,6})$/) { + @sumweights=split(',',$1); + } else { + print STDERR "Warning: Option value ".$arg." not valid, ignoring option.\n"; + } +} + +sub parse_options_format { + my $arg=shift @_; + if ($arg eq '-brief') {$briefsum=1;} + elsif ($arg eq '-total') {$totalflag=1;} + elsif ($arg eq '-1') {$briefsum=1;$totalflag=1;$verbose=-1;} + elsif ($arg eq "-html" ) {option_no_colours();$htmlstyle = 2;} + elsif ($arg eq "-htmlcore" ) {option_no_colours();$htmlstyle = 1;} + elsif ($arg=~/^\-(nocol|nc$)/) {option_no_colours();} + elsif ($arg eq '-codes') { + $showcodes=2; + if ($verbose==0) {$verbose=3;} + } + elsif ($arg eq '-nocodes') {$showcodes=0;} + else {return 0;} + return 1; +} + +sub parse_options_output { + my $arg=shift @_; + if ($arg eq "-v0") {$verbose=0;} + elsif ($arg eq "-v1") {$verbose=1;} + elsif ($arg eq '-vv' || $arg eq '-v2') {$verbose=2;} + elsif ($arg eq '-vvv' || $arg eq '-v3' || $arg eq '-v') {$verbose=3;} + elsif ($arg eq '-vvvv' || $arg eq '-v4') {$verbose=3; $showstates=1;} + elsif ($arg =~ /^\-showstates?$/ ) {$showstates=1;} + elsif ($arg =~ /^-(q|-?quiet)$/ ) {$verbose=-1;} + else {return 0;} + return 1; +} + +# Parse file list and return total count +sub parse_file_list { + my @filelist=@_; + my $listtotalcount=new_count("TOTAL COUNT"); + for my $file (<@filelist>) { + my $filetotalcount=parse_file($file); + add_count($listtotalcount,$filetotalcount); + } + return $listtotalcount; +} + +# Parse file and included files, and return total count +sub parse_file { + my $file=shift @_; + $workdir=$globalworkdir; + if (!defined $workdir) { + $workdir=$file; + $workdir =~ s/^((.*[\\\/])?)[^\\\/]+$/$1/; + } + @filelist=($file); + if ($htmlstyle) {print "\n<div class='filegroup'>\n";} + my $filetotalcount=new_count("SUM COUNT FOR ".$file); + foreach my $f (@filelist) { + my $tex=TeXfile($f); + my $fpath=$f; + $fpath=~s/^((.*[\\\/])?)[^\\\/]+$/$1/; + if (!defined $tex) { + print STDERR "File not found or not readable: ".$f."\n"; + formatprint("File not found or not readable: ".$f."\n","p","error"); + } else { + parse($tex); + my $filecount=add_subcount($tex); + if (!$totalflag) { + print_count($filecount); + print "\n"; + } + add_count($filetotalcount,$filecount); + } + } + if ($htmlstyle) {print "</div>\n\n";} + return $filetotalcount; +} + + +###### +###### Subroutines +###### + +###### CMD specific implementations + + +sub add_file_to_list { + my $fname=shift @_; + push @filelist,$workdir.$fname; +} + +sub print_with_style { + my ($text,$style,$colour)=@_; + #if ($utf8flag || $htmlstyle) {utf8::encode($text);} + if ($htmlstyle) { + print "<span class='".$style."'>".$text."</span>"; + } else { + print Term::ANSIColor::colored($text,$colour); + } +} + +sub option_no_colours { + $ENV{'ANSI_COLORS_DISABLED'} = 1; +} + +# Print count (total) if conditions are met +sub conditional_print_total { + my $sumcount=shift @_; + if ($totalflag || get_count($sumcount,0)>1) { + if ($totalflag && $briefsum && @sumweights) { + print total_count($sumcount); + } else { + if ($htmlstyle) { + formatprint("Total word count",'h2'); + } + print_count($sumcount); + } + } +} + +###### Option handling + + +# Apply options to set values +sub Apply_Options { + %STYLE=%{$STYLES[$verbose]}; + if ($utf8flag) {binmode STDOUT,':utf8';} + if ($htmlstyle>1) {html_head();} + foreach (@WordPatterns) { + s/\@/$LetterPattern/g; + } +} + + +###### TeX code handle + + +sub TeXfile { + my $filename=shift @_; + my $file=read_file($filename) || return undef; + return TeXcode($file,$filename); +} + +sub read_file { + my $filename=shift @_; + if ($utf8flag) { + open(FH,"<:utf8",$filename) || return undef; + } else { + open(FH,"<".$filename) || return undef; + } + if ($verbose>0) { + formatprint("File: ".$filename."\n",'h2'); + $blankline=0; + } + my @text=<FH>; + close(FH); + my $latexcode=join('',@text); + if ($utf8flag) { + $latexcode =~ s/^\x{feff}//; + } + return $latexcode; +} + +###### Parsing routines + + +# Make TeXcode handle +sub TeXcode { + my ($texcode,$filename,$title)=@_; + my %TeX=(); + $TeX{'filename'}=$filename; + if (!defined $filename) { + $TeX{'filepath'}=''; + } elsif ($filename=~/^(.*[\\\/])[^\\\/]+$/) { + $TeX{'filepath'}=$1; + } else { + $TeX{'filepath'}=''; + } + if (defined $title) {} + elsif (defined $filename) {$title="FILE: ".$filename;} + else {$title="Word count";} + $TeX{'line'}=$texcode; + $TeX{'next'}=undef; + $TeX{'type'}=undef; + $TeX{'style'}=undef; + $TeX{'printstate'}=undef; + $TeX{'eof'}=0; + my $countsum=new_count($title); + $TeX{'countsum'}=$countsum; + my $count=new_count("_top_"); + $TeX{'count'}=$count; + inc_count(\%TeX,0); + my @countlist=(); + $TeX{'countlist'}=\@countlist; + $countsum->{'subcounts'}=\@countlist; + return \%TeX; +} + +# Parse LaTeX document +sub parse { + my ($tex)=@_; + if ($htmlstyle && $verbose) {print "<p class=parse>\n";} + while (!($tex->{'eof'})) { + parse_unit($tex,1); + } + if ($htmlstyle && $verbose) {print "</p>\n";} +} + +# Parse one block or unit +sub parse_unit { + # Status: + # 0 = exclude from count + # 1 = text + # 2 = header text + # 3 = float text + # -1 = float (exclude) + # -2 = strong exclude, ignore begin-end groups + # -3 = stronger exclude, do not parse macro parameters + # -9 = preamble (between \documentclass and \begin{document}) + my ($tex,$status,$end)=@_; + if (!defined $status) { + print_error("CRITICAL ERROR: Undefined parser status!"); + exit; + } elsif (ref($status) eq 'ARRAY') { + print_error("CRITICAL ERROR: Invalid parser status!"); + exit; + } + my $substat; + if ($showstates) { + if (defined $end) { + $tex->{'printstate'}=':'.$status.':'.$end.':'; + } else { + $tex->{'printstate'}=':'.$status.':'; + } + flush_next($tex); + } + while (defined (my $next=next_token($tex))) { + # parse next token; or tokens until match with $end + set_style($tex,"ignore"); + if ((defined $end) && ($end eq $next)) { + # end of unit + return; + } elsif (!defined $next) { + print_error("ERROR: End of file while waiting for ".$end); + return; + } + if ($status==-9 && $next eq '\begin' && $tex->{'line'}=~/^\{\s*document\s*\}/) { + # \begin{document} + $status=1; + } + if ($next eq '\documentclass') { + # starts preamble + set_style($tex,'document'); + gobble_option($tex); + gobble_macro_parms($tex,1); + while (!($tex->{'eof'})) { + parse_unit($tex,-9); + } + } elsif ($tex->{'type'}==666) { + # parse TC instructions + parse_tc($tex); + } elsif ($tex->{'type'}==1) { + # word + if ($status>0) { + inc_count($tex,$status); + set_style($tex,'word'.$status); + } + } elsif ($next eq '{') { + # {...} + parse_unit($tex,$status,'}'); + } elsif ($tex->{'type'}==3 && $status==-3) { + set_style($tex,'ignore'); + } elsif ($tex->{'type'}==3) { + # macro call + parse_macro($tex,$next,$status,$substat); + } elsif ($next eq '$') { + # math inline + parse_math($tex,$status,6,'$'); + } elsif ($next eq '$$') { + # math display (unless already in inlined math) + if (!(defined $end && $end eq '$')) { + parse_math($tex,$status,7,'$$'); + } + } + if (!defined $end) {return;} + } +} + +sub parse_macro { + my ($tex,$next,$status,$substat)=@_; + if (my $label=$BreakPoints{$next}) { + if ($tex->{'line'}=~ /^[*]?(\s*\[.*?\])*\s*\{(.+?)\}/ ) { + $label=$label.': '.$2; + } + add_subcount($tex,$label); + } + set_style($tex,$status>0?'command':'exclcommand'); + if ($next eq '\begin' && $status!=-2) { + parse_begin_end($tex,$status); + } elsif (($status==-1) && ($substat=$TeXfloatinc{$next})) { + # text included from float + set_style($tex,'command'); + gobble_macro_parms($tex,$substat); + } elsif ($status==-9 && defined ($substat=$TeXpreamble{$next})) { + # parse preamble include macros + set_style($tex,'command'); + if (defined $TeXheader{$next}) {inc_count($tex,4);} + gobble_macro_parms($tex,$substat,1); + } elsif ($status<0) { + # ignore + gobble_option($tex); + } elsif ($next eq '\(') { + # math inline + parse_math($tex,$status,6,'\)'); + } elsif ($next eq '\[') { + # math display + parse_math($tex,$status,7,'\]'); + } elsif ($next eq '\def') { + # ignore \def... + $tex->{'line'} =~ s/^([^\{]*)\{/\{/; + flush_next($tex); + print_style($1.' ','ignore'); + parse_unit($tex,-2); + } elsif (defined (my $addsuffix=$TeXfileinclude{$next})) { + # include file: queue up for parsing + parse_include_file($tex,$status,$addsuffix); + } elsif (defined ($substat=$TeXmacro{$next})) { + # macro: exclude options + if (defined $TeXheader{$next}) {inc_count($tex,4);} + gobble_macro_parms($tex,$substat,$status); + } elsif (defined ($substat=$TeXmacroword{$next})) { + # count macro as word (or a given number of words) + inc_count($tex,$status,$substat); + set_style($tex,'word'.$status); + } elsif ($next =~ /^\\[^\w\_]/) { + } else { + gobble_option($tex); + } +} + +sub parse_tc { + my ($tex)=@_; + my $next=$tex->{'next'}; + set_style($tex,'tc'); + flush_next($tex); + if (!($next=~s/^\%+TC:\s*(\w+)\s*// )) { + print_error('Warning: TC command should have format %TC:instruction [macro] [parameters]'); + return; + }; + my $instr=$1; + if ($instr=~/^(break)$/) { + if ($instr eq 'break') {add_subcount($tex,$next);} + } elsif ($next=~/^([\\]*\w+)\s+([^\s\n]+)(\s+([0-9]+))?/) { + # Format = TC:word macro + my $macro=$1; + my $param=$2; + my $option=$4; + if ($param=~/^\[([0-9,]+)\]$/) {$param=[split(',',$1)];} + if (($instr eq 'macro') || ($instr eq 'exclude')) {$TeXmacro{$macro}=$param;} + elsif ($instr eq 'header') {$TeXheader{$macro}=$param;$TeXmacro{$macro}=$param;} + elsif ($instr eq 'macroword') {$TeXmacroword{$macro}=$param;} + elsif ($instr eq 'preambleinclude') {$TeXpreamble{$macro}=$param;} + elsif ($instr eq 'group') { + $TeXmacro{'begin'.$macro}=$param; + $TeXgroup{$macro}=$option; + } + elsif ($instr eq 'floatinclude') {$TeXfloatinc{$macro}=$param;} + elsif ($instr eq 'fileinclude') {$TeXfileinclude{$macro}=$param;} + elsif ($instr eq 'breakmacro') {$BreakPoints{$macro}=$param;} + else {print_error("Warning: Unknown TC command: ".$instr);} + } elsif ($instr eq 'ignore') { + tc_ignore_input($tex); + } else { + print_error("Warning: Invalid TC command format: ".$instr); + } +} + +sub tc_ignore_input { + my ($tex)=@_; + set_style($tex,'ignore'); + parse_unit($tex,-3,"%TC:endignore"); + set_style($tex,'tc'); + flush_next($tex); +} + +sub parse_math { + my ($tex,$status,$substat,$end)=@_; + my $localstyle=$status>0 ? 'mathgroup' : 'exclmath'; + if ($status>0) {inc_count($tex,$substat);} + set_style($tex,$localstyle); + parse_unit($tex,0,$end); + set_style($tex,$localstyle); +} + +sub parse_begin_end { + my ($tex,$status)=@_; + my $localstyle=$status>0 ? 'grouping' : 'exclgroup'; + flush_style($tex,$localstyle); + gobble_option($tex); + my $groupname; + if ($tex->{'line'} =~ s/^\{\s*([^\{\}]+)\s*\*?\}[ \t\r\f]*//) { + # gobble group type + $groupname=$1; + print_style('{'.$1.'}',$localstyle); + my $next='begin'.$1; + if (defined (my $substat=$TeXmacro{$next})) { + gobble_macro_parms($tex,$substat); + } + } else { + print_error("Warning: BEGIN group without type."); + } + # find group status (or leave unchanged) + my $substat; + defined ($substat=$TeXgroup{$1}) || ($substat=$status); + if ($status<=0 && $status<$substat) {$substat=$status;} + if (($status>0) && ($substat==-1)) { + # Count float + inc_count($tex,5); + } + if ($status>0 and $substat>3) { + # count item, exclude contents + inc_count($tex,$substat); + $substat=0; + } + parse_unit($tex,$substat,'\end'); + if ($tex->{'line'} =~ s/^\{\s*([^\{\}]+)\s*\}[ \t\r\f]*//) { + # gobble group type + flush_style($tex,$localstyle); + print_style('{'.$1.'}',$localstyle); + } else { + print_error("Warning: END group without type while waiting to end ".$groupname."."); + } +} + +sub parse_include_file { + my ($tex,$status,$addsuffix)=@_; + $tex->{'line'} =~ s/^\{([^\{\}\s]+)\}// || + $tex->{'line'} =~ s/^\s*([^\{\}\%\\\s]+)// || + return; + flush_next($tex); + if ($status>0) { + print_style($&,'fileinclude'); + my $fname=$1; + if ($addsuffix==2) {$fname.='.tex';} + elsif ($addsuffix==1 && ($fname=~/^[^\.]+$/)) {$fname.='.tex';} + if ($includeTeX) {add_file_to_list($fname);} + } else { + print_style($&,'ignored'); + } +} + +sub gobble_option { + my $tex=shift @_; + flush_next($tex); + foreach my $pattern (@MacroOptionPatterns) { + if ($tex->{'line'}=~s/^($pattern)//) { + print_style($1,'option'); + return $1; + } + } + return undef; +} + +sub gobble_options { + while (gobble_option(@_)) {} +} + +sub gobble_macro_modifier { + my $tex=shift @_; + flush_next($tex); + if ($tex->{'line'} =~ s/^\*//) { + print_style($1,'option'); + return $1; + } + return undef; +} + +sub gobble_macro_parms { + my ($tex,$parm,$oldstat)=@_; + my $i; + if (ref($parm) eq 'ARRAY') { + $i=scalar @{$parm}; + } else { + $i=$parm; + $parm=[0,0,0,0,0,0,0,0,0]; + } + if ($i>0) {gobble_macro_modifier($tex);} + gobble_options($tex); + for (my $j=0;$j<$i;$j++) { + parse_unit($tex,new_status($parm->[$j],$oldstat)); + gobble_options($tex); + } +} + +sub new_status { + my ($substat,$old)=@_; + if (!defined $old) {return $substat;} + if ($old==-3 || $substat==-3) {return -3;} + if ($old==-2 || $substat==-2) {return -2;} + if ($old==0 || $substat==0) {return 0;} + if ($old==-9 || $substat==-9) {return -9;} + if ($old>$substat) {return $old;} + return $substat; +} + +sub next_token { + my $tex=shift @_; + my ($next,$type); + if (defined $tex->{'next'}) {print_style($tex->{'next'}.' ',$tex->{'style'});} + $tex->{'style'}=undef; + while (defined ($next=get_next_token($tex))) { + $type=$tex->{'type'}; + if ($type==0) { + print_style($next,'comment'); + } elsif ($type==9) { + if ($verbose>0) {line_return(1,$tex);} + } else { + return $next; + } + } + return $next; +} + + +sub get_next_token { + # Token (or token group) category: + # 0: comment + # 1: word (or other forms of text or text components) + # 2: symbol (not word, e.g. punctuation) + # 3: macro + # 4: curly braces {} + # 5: brackets [] + # 6: maths + # 9: line break in file + # 999: end of line or blank line + # 666: TeXcount instruction (%TC:instruction) + my $tex=shift @_; + my $next; + (defined ($next=get_token($tex,'\%+TC:\s*endignore\b[^\r\n]*',666))) && return "%TC:endignore"; + (defined ($next=get_token($tex,'\%+TC:[^\r\n]*',666))) && return $next; + (defined ($next=get_token($tex,'\%[^\r\n]*',0))) && return $next; + (defined ($next=get_token($tex,'(\r|\n|\r\n)',9))) && return $next; + (defined ($next=get_token($tex,'\\\\[\{\}]',2))) && return $next; + foreach my $pattern (@WordPatterns) { + (defined ($next=get_token($tex,$pattern,1))) && return $next; + } + (defined ($next=get_token($tex,'[\"\'\`:\.,\(\)\[\]!\+\-\*=/\^\_\@\<\>\~\#\&]',2))) && return $next; + (defined ($next=get_token($tex,'\\\\([a-zA-Z_]+|[^a-zA-Z_])',3))) && return $next; + (defined ($next=get_token($tex,'[\{\}]',4))) && return $next; + (defined ($next=get_token($tex,'[\[\]]',5))) && return $next; + (defined ($next=get_token($tex,'\$\$',6))) && return $next; + (defined ($next=get_token($tex,'\$',6))) && return $next; + (defined ($next=get_token($tex,'.',999))) && return $next; + (defined ($next=get_token($tex,'[^\s]+',999))) && return $next; + $tex->{'eof'}=1; + return undef; +} + +sub get_token { + my ($tex,$regexp,$type)=@_; + if (!defined $regexp) {print_error("ERROR in get_token: undefined regex.");} + if (!defined $tex->{'line'}) {print_error("ERROR in get_token: undefined tex-line. ".$tex->{'next'});} + if ( $tex->{'line'} =~ s/^($regexp)[ \t\r\f]*// ) { + $tex->{'next'}=$1; + $tex->{'type'}=$type; + return $1; + } + return undef; +} + +###### Count handling routines + + +sub new_count { + my ($title)=@_; + my @cnt=(0,0,0,0,0,0,0,0); + my %count=('count'=>\@cnt,'title'=>$title); + # files, text words, header words, float words, + # headers, floats, math-inline, math-display; + return \%count; +} + +sub inc_count { + my ($tex,$type,$value)=@_; + my $count=$tex->{'count'}; + if (!defined $value) {$value=1;} + ${$count->{'count'}}[$type]+=$value; +} + +sub get_count { + my ($count,$type)=@_; + return ${$count->{'count'}}[$type]; +} + +sub total_count { + my ($count)=@_; + my $sum=0; + for (my $i=scalar(@sumweights);$i-->0;) { + $sum+=get_count($count,$i+1)*$sumweights[$i]; + } + return $sum; +} + +sub print_count { + my ($count,$header)=@_; + if ($briefsum && @sumweights) { + print_count_total($count,$header); + } elsif ($briefsum) { + if ($htmlstyle) {print "<p class='briefcount'>";} + print_count_brief($count,$header); + if ($htmlstyle) {print "</p>\n";} + } else { + print_count_details($count,$header); + } +} + +sub print_count_with_header { + my ($count,$header)=@_; + if (!defined $header) {$header=$count->{'title'};} + if (!defined $header) {$header="";} + return $count,$header; +} + +sub print_count_total { + my ($count,$header)=print_count_with_header(@_); + if ($htmlstyle) {print "<p class='count'>".$header;} + print total_count($count); + if ($htmlstyle) {print "</p>\n";} + else {print ": ".$header;} +} + +sub print_count_brief { + my ($count,$header)=print_count_with_header(@_); + my $cnt=$count->{'count'}; + print ${$cnt}[1]."+".${$cnt}[2]."+".${$cnt}[3]. + " (".${$cnt}[4]."/".${$cnt}[5]."/".${$cnt}[6]."/".${$cnt}[7].") ". + $header; +} + +sub print_count_details { + my ($count,$header)=print_count_with_header(@_); + if ($htmlstyle) {print "<dl class='count'>\n";} + if (defined $header) { + formatprint($header."\n",'dt','header'); + } + if (get_count($count,0)>1) { + formatprint($countlabel[0].': ','dt'); + formatprint(get_count($count,0)."\n",'dd'); + } + if (@sumweights) { + formatprint('Sum count: ','dt'); + formatprint(total_count($count)."\n",'dd'); + } + for (my $i=1;$i<8;$i++) { + formatprint($countlabel[$i].': ','dt'); + formatprint(get_count($count,$i)."\n",'dd'); + } + my $subcounts=$count->{'subcounts'}; + if ($showsubcounts && defined $subcounts && scalar(@{$subcounts})>1) { + formatprint("Subcounts: text+headers+captions (#headers/#floats/#inlines/#displayed)\n",'dt'); + foreach my $subcount (@{$subcounts}) { + if ($htmlstyle) {print "<dd class='briefcount'>";} + print_count_brief($subcount); + if ($htmlstyle) {print "</dd>";} + print "\n"; + } + } + if ($htmlstyle) {print "</dl>\n";} +} + +sub add_count { + my ($a,$b)=@_; + for (my $i=0;$i<8;$i++) { + ${$a->{'count'}}[$i]+=${$b->{'count'}}[$i]; + } +} + +sub add_subcount { + my ($tex,$title)=@_; + add_count($tex->{'countsum'},$tex->{'count'}); + push @{$tex->{'countlist'}},$tex->{'count'}; + $tex->{'count'}=new_count($title); + return $tex->{'countsum'}; +} + +###### Printing routines + + +sub set_style { + my ($tex,$style)=@_; + if (!(($tex->{'style'}) && ($tex->{'style'} eq '-'))) {$tex->{'style'}=$style;} +} + +sub flush_style { + my ($tex,$style)=@_; + set_style($tex,$style); + flush_next($tex); +} + +sub line_return { + my ($blank,$tex)=@_; + if ($blank>$blankline) { + if ((defined $tex) && @sumweights) { + my $num=total_count($tex->{'count'}); + print_style(" [".$num."]","sumcount"); + } + linebreak(); + $blankline++; + } +} + +sub linebreak { + if ($htmlstyle) {print "<br>\n";} else {print "\n";} +} + +sub print_style { + my ($text,$style,$state)=@_; + (($verbose>=0) && (defined $text) && (defined $style)) || return 0; + my $colour; + ($colour=$STYLE{$style}) || return; + if (($colour) && !($colour eq '-')) { + print_with_style($text,$style,$colour); + if ($state) { + print_style($state,'state'); + } + $blankline=-1; + return 1; + } else { + return 0; + } +} + +sub print_error { + my $text=shift @_; + $errorcount++; + if ($verbose>=0) { + line_return(1); + print_style("!!! ".$text." !!!",'error'); + line_return(1); + } +} + +sub formatprint { + my ($text,$tag,$class)=@_; + my $break=($text=~s/\n$//); + if ($htmlstyle && defined $tag) { + print '<'.$tag; + if ($class) {print " class='".$class."'";} + print '>'.$text.'</'.$tag.'>'; + } else { + print $text; + } + if ($break) {print "\n";} +} + +sub flush_next { + my $tex=shift @_; + if (defined $tex->{'next'}) { + print_style($tex->{'next'}.' ',$tex->{'style'},$tex->{'printstate'}); + } + $tex->{'printstate'}=undef; + $tex->{'style'}='-'; +} + + +# Close the output, e.g. adding HTML tail +sub Close_Output { + if ($htmlstyle>1) { + html_tail(); + } +} + + +# Report if there were any errors occurring during parsing +sub Report_ErrorCount { + if ($errorcount==0) {return;} + if ($briefsum && $totalflag) {print " ";} + if ($htmlstyle) { + print_error("Errors:".$errorcount,"p","error"); + } else { + print "(errors:".$errorcount.")"; + } +} + + +sub print_help_style { + if ($verbose<=0) {return;} + formatprint("Format/colour codes of verbose output:","h2"); + print "\n\n"; + if ($htmlstyle) {print "<p class='stylehelp'>";} + help_style_line('Text which is counted',"word1","counted as text words"); + help_style_line('Header and title text',"word2","counted as header words"); + help_style_line('Caption text and footnotes',"word3","counted as caption words"); + help_style_line("Ignored text or code","ignore","excluded or ignored"); + help_style_line('\documentclass',"document","document start, beginning of preamble"); + help_style_line('\macro',"command","macro not counted, but parameters may be"); + help_style_line('\macro',"exclcommand","macro in excluded region"); + help_style_line("[Macro options]","option","not counted"); + help_style_line('\begin{group} \end{group}',"grouping","begin/end group"); + help_style_line('\begin{group} \end{group}',"exclgroup","begin/end group in excluded region"); + help_style_line('$ $',"mathgroup","counted as one equation"); + help_style_line('$ $',"exclmath","equation in excluded region"); + help_style_line('% Comments',"comment","not counted"); + help_style_line('%TC:TeXcount instructions',"tc","not counted"); + help_style_line("File to include","fileinclude","not counted but file may be counted later"); + if ($showstates) { + help_style_line('[state]',"state","internal TeXcount state"); + } + if (@sumweights) { + help_style_line('[sumcount]',"sumcount","cumulative sum count"); + } + help_style_line("ERROR","error","TeXcount error message"); + if ($htmlstyle) {print "</p>";} + print "\n\n"; +} + +sub help_style_line { + my ($text,$style,$comment)=@_; + if ($htmlstyle) { + $comment=" .... ".$comment; + } else { + $comment=" .... ".$comment; + } + if (print_style($text,$style)) { + print $comment; + linebreak(); + } +} + +# Print output style codes if conditions are met +sub conditional_print_help_style { + if ($showcodes) {print_help_style();} + return $showcodes; +} + +###### HTML routines + + + +sub html_head { + print "<html>\n<head>"; + if ($utf8flag) { + print "\n<meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\">"; + } + print ' +<style> +<!-- +body {width:auto;padding:5;margin:5;} +.error {font-weight:bold;color:#f00;font-style:italic;} +.word1,.word2,.word3 {color: #009;} +.word2 {font-weight: 700;} +.word3 {font-style: italic;} +.command {color: #c00;} +.exclcommand {color: #f99;} +.option {color: #cc0;} +.grouping, .document {color: #900; font-weight:bold;} +.mathgroup {color: #090;} +.exclmath {color: #6c6;} +.ignore {color: #999;} +.exclgroup {color:#c66;} +.tc {color: #999; font-weight:bold;} +.comment {color: #999; font-style: italic;} +.state {color: #990; font-size: 70%;} +.sumcount {color: #999; font-size: 80%;} +.fileinclude {color: #696; font-weight:bold;} +dl.count {background: #cfc; color: 009;} +dl.count dt.header {font-weight: bold; font-style: italic; float: none;} +dl.count dt {clear: both; float: left; margin-right: .5em;} +dl.count dd {font-weight: bold;} +dl.count dd.briefcount {font-weight: 700; clear: both; font-size:80%; font-weight:normal; margin-left:8pt;} +.warning {color: #c00; font-weight: 700;} +.parse, .count, .stylehelp, .filegroup {border: solid 1px #999; margin: 0pt; padding: 4pt;} +.parse {font-size: 80%; background: #eef;} +.parse {border-bottom:none;} +.stylehelp {font-size: 80%; background: #ffc; margin-bottom: 8pt;} +.filegroup {background: #efe; margin-bottom: 8pt;} +--> +</style> +</head> +<body> +<h1>LaTeX word count</h1> +'; +} + +sub html_tail { + print '</body></html>'; +} + +###### Help routines + + + +sub print_version { + print "TeXcount version ".$versionnumber.", ".$versiondate.'.'; +} + +sub print_syntax { + print ' +Syntax: TeXcount.pl [options] files + +Options: + -relaxed Uses relaxed rules for word and option handling: + i.e. allows more general cases to be counted as + either words or macros. + -v Verbose (same as -v3) + -v0 Do not present parsing details + -v1 Verbose: print parsed words, mark formulae + -v2 More verbose: also print ignored text + -v3 Even more verbose: include comments and options + -v4 Same as -v3 -showstate + -showstate Show internal states (with verbose) + -brief Only prints a brief, one line summary of counts + -q, -quiet Quiet mode, no error messages (use is discouraged!) + -sum, -sum= Make sum of all word and equation counts. May also + use -sum=#[,#] with up to 7 numbers to indicate how + each of the counts (text words, header words, caption + words, #headers, #floats, #inlined formulae, + #displayed formulae) are summed. The default sum (if + only -sum is used) is the same as -sum=1,1,1,0,0,1,1. + -sub, -sub= Generate subcounts. Option values are none, part, + chapter, section or subsection. Default (-sub) is set + to subsection, whereas unset is none. (Alternative + option name is -subcount.) + -nc, -nocol No colours (colours require ANSI) + -html Output in HTML format + -htmlcore Only HTML body contents + -inc Include tex files included in the document + -noinc Do not include included tex files (default) + -total Do not give sums per file, only total sum. + -1 Same as -brief and -total. Ensures there is only one + line of output. If used in conjunction with -sum, the + output will only be the total number. (NB: Character + is the number one, not the letter L.) + -dir, -dir= Specify the working directory using -dir=path. + Remember that the path must end with \ or /. If only + -dir is used, the directory of the parent file is used. + -utf8, -unicode Turns on Unicode (UTF-8) for input and output. This + is automatic with -chinese, and is required to handle + e.g. Korean text. Note that the TeX file must be save + in UTF-8 format (not e.g. GB2312 or Big5), or the + result will be unpredictable. + -ch, -chinese, -zhongwen Turns on support for Chinese characters. + TeXcount will then count each Chinese character as a + word. Automatically turns on -utf8. + -jp, -japanese Turns on support for Japanese characters. TeXcount + will count each Japanese character (kanji, hiragana, + and katakana) as one word, i.e. not do any form of + word segmentation. Automatically turns on -utf8. + -codes Display output style code overview and explanation. + This is on by default. + -nocodes Do not display output style code overview. + -h, -?, --help, /? Help + --version Print version number + --license License information +'; +} + +sub print_help { + print ' +*************************************************************** +* TeXcount.pl '.$versionnumber.', '.$versiondate.' +* + +Count words in TeX and LaTeX files, ignoring macros, tables, +formulae, etc. +'; + print_syntax(); + print ' +The script counts words as either words in the text, words in +headers/titles or words in floats (figure/table captions). +Macro options (i.e. \marco[...]) are ignored; macro parameters +(i.e. \macro{...}) are counted or ignored depending on the +macro, but by default counted. Begin-end groups are by default +ignored and treated as \'floats\', though some (e.g. center) are +counted. + +Unless -nocol (or -nc) has been specified, the output will be +colour coded. Counted text is coloured blue with headers are in +bold and in HTML output caption text is italicised. + +Mathematical formulae are not counted as words, but are instead +counted separately with separate counts for inlined formulae +and displayed formulae. Similarly, the number of headers and +the number of \'floats\' are counted. Note that \'float\' is used +here to describe anything defined in a begin-end group unless +explicitly recognized as text or mathematics. + +The verbose options (-v1, -v2, -v3, showstate) produces output +indicating how the text has been interpreted. Check this to +ensure that words in the text has been interpreted as such, +whereas mathematical formulae and text/non-text in begin-end +groups have been correctly interpreted. + +Parsing instructions may be passed to TeXcount using comments +in the LaTeX files on the format + %TC:instruction arguments +where valid instructions for setting parsing rules, typically +set at the start of the document (applies globally), are: + %TC:macro [macro] [param.states] + macro handling rule, no. of and rules for parameters + %TC:macroword [macro] [number] + macro counted as a given number of words + %TC:header [macro] [param.states] + header macro rule, as macro but counts as one header + %TC:breakmacro [macro] [label] + macro causing subcount break point + %TC:group [name] [parsing-state] + begin-end-group handling rule + %TC:floatinclude [macro] [param.states] + as macro, but also counted inside floats + %TC:preambleinclude [macro] [param.states] + as macro, but also counted inside the preamble + %TC:fileinclue [macro] [rule] + file include, add .tex if rule=2, not if rule=0 +The [param.states] is used to indicate the number of parameters +used by the macro and the rules of handling each of these: format +is [#,#,...,#] with one number for each parameter, and main rules +are 0 to ignore and 1 to count as text. Parsing instructions +which may be used anywhere are: + %TC:ignore start block to ignore + %TC:endignore end block to ignore + %TC:break [title] add subcount break point here +See the documentation for more details. + +Unix hint: Use \'less -r\' instead of just \'less\' to view output: +the \'-r\' option makes less treat text formating codes properly. + +Windows hint: If your Windows interprets ANSI colour codes, lucky +you! Otherwise, use the -nocol (or -nc) option with the verbose +options or the output will be riddled with colour codes. Instead, +you can use -html to produce HTML code, write this to file and +view with your favourite browser. +'; + print_reference(); +} + +sub print_reference { + print ' +The TeXcount script is copyright of Einar Andreas Rødland (2008) +and published under the LaTeX Project Public License. + +For more information about the script, e.g. news, updates, help, +usage tips, known issues and short-comings, go to + http://folk.uio.no/einarro/Comp/texwordcount.html +or go to + http://folk.uio.no/einarro/Services/texcount.html +to access the script as a web service. Feedback such as problems +or errors can be reported to einarro@ifi.uio.no. +'; +} + +sub print_license { + print 'TeXcount version '.$versionnumber.' + +Copyright 2008 Einar Andreas Rødland + +The TeXcount script is published under the LaTeX Project Public +License (LPPL) + http://www.latex-project.org/lppl.txt +which grants you, the user, the right to use, modify and distribute +the script. However, if the script is modified, you must change its +name or use other technical means to avoid confusion. + +The script has LPPL status "maintained" with Einar Andreas +Rødland being the current maintainer. +'; +} + |