aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xbin/autostart3
-rwxr-xr-x[-rw-r--r--]bin/backup-profiles53
-rwxr-xr-xbin/backup-settings19
-rwxr-xr-xbin/flac2mp319
-rwxr-xr-xbin/lowercase23
-rwxr-xr-xbin/nvsetup10
-rwxr-xr-xbin/rmcache31
-rwxr-xr-xbin/tex2pdf24
-rwxr-xr-xbin/texcount.pl1391
9 files changed, 1565 insertions, 8 deletions
diff --git a/bin/autostart b/bin/autostart
index 1fac332..8280ce3 100755
--- a/bin/autostart
+++ b/bin/autostart
@@ -1,4 +1,7 @@
#!/bin/bash
+xset -dpms
+xset s 3600 3600
+xset s off
# make RT check and don't start!
KERNEL_NAME=$(uname -r)
if [[ "$KERNEL_NAME" != *rt* ]]; then
diff --git a/bin/backup-profiles b/bin/backup-profiles
index 2f87701..66de7af 100644..100755
--- a/bin/backup-profiles
+++ b/bin/backup-profiles
@@ -1,10 +1,47 @@
-#!/bin/bash
+#!/bin/sh
-cd /tmp
-tar cvfz firefox.tgz $1/.mozilla/firefox/david.runge-backup
-gpg -e -r 'David Runge <david.runge@frqrec.com>' firefox.tgz
-mv -f firefox.tgz.gpg $1/Dropbox/sync
+firefoxPID=`ps -C firefox -o pid=`
+thunderbirdPID=`ps -C thunderbird -o pid=`
+
+firefoxTMP=/tmp/firefox.tgz
+thunderbirdTMP=/tmp/thunderbird.tgz
+firefoxIF=~/.mozilla/firefox/david.runge
+thunderbirdIF=~/.thunderbird/dvzrv
+OF=~/Dropbox/sync
+gpgEND=.gpg
+
+
+if [[ -z "$firefoxPID" ]]
+then
+ echo "Firefox is not running."
+ echo "Backing up profile."
+ tar cfPzh $firefoxTMP $firefoxIF
+ echo "Encrypting profile."
+ gpg -e -r 'David Runge <david.runge@frqrec.com>' $firefoxTMP
+ echo "Moving backup to Dropbox."
+ mv -f $firefoxTMP$gpgEND $OF
+ echo "Cleaning up."
+ rm $firefoxTMP
+ echo "Done."
+else
+ echo "Firefox is still running."
+ echo "Skipping backup."
+fi
+
+if [[ -z "$thunderbirdPID" ]]
+then
+ echo "Thunderbird is not running."
+ echo "Backing up profile."
+ tar cfPz $thunderbirdTMP $thunderbirdIF
+ echo "Encrypting profile."
+ gpg -e -r 'David Runge <david.runge@frqrec.com>' $thunderbirdTMP
+ echo "Moving backup to Dropbox."
+ mv -f $thunderbirdTMP$gpgEND $OF
+ echo "Cleaning up."
+ rm $thunderbirdTMP
+ echo "Done."
+else
+ echo "Thunderbird is still running."
+ echo "Skipping backup."
+fi
-tar cvfz thunderbird.tgz $1/.thunderbird/david.runge
-gpg -e -r 'David Runge <david.runge@frqrec.com>' thunderbird.tgz
-mv -f thunderbird.tgz.gpg $1/Dropbox/sync
diff --git a/bin/backup-settings b/bin/backup-settings
new file mode 100755
index 0000000..955a59e
--- /dev/null
+++ b/bin/backup-settings
@@ -0,0 +1,19 @@
+#! /bin/sh
+
+etcTMP=/tmp/etc.tgz
+etcIF=/etc
+OF=/home/dave/Dropbox/sync
+gpgEND=.gpg
+
+echo "Backing up /etc directory."
+tar cfPz $etcTMP $etcIF
+echo "Encrypting compressed directory."
+su - dave -c "gpg -e -r 'David Runge <david.runge@frqrec.com>' $etcTMP"
+echo "Changing permissions on file."
+chown dave:dave $etcTMP$gpgEND
+echo "Moving backup to Dropbox."
+mv -f $etcTMP$gpgEND $OF
+echo "Cleaning up."
+rm $etcTMP
+echo "Done."
+
diff --git a/bin/flac2mp3 b/bin/flac2mp3
new file mode 100755
index 0000000..9dd970b
--- /dev/null
+++ b/bin/flac2mp3
@@ -0,0 +1,19 @@
+find -name *.flac -print0 | while read -d $'\0' a
+
+
+do
+OUTF=`echo "$a" | sed s/\.flac$/.mp3/g`
+
+ARTIST=`metaflac "$a" --show-tag=ARTIST | sed s/.*=//g`
+TITLE=`metaflac "$a" --show-tag=TITLE | sed s/.*=//g`
+ALBUM=`metaflac "$a" --show-tag=ALBUM | sed s/.*=//g`
+GENRE=`metaflac "$a" --show-tag=GENRE | sed s/.*=//g`
+TRACKNUMBER=`metaflac "$a" --show-tag=TRACKNUMBER | sed s/.*=//g`
+DATE=`metaflac "$a" --show-tag=DATE | sed s/.*=//g`
+
+flac -c -d "$a" | lame -m j -q 0 --vbr-new -V 0 -s 44.1 - "$OUTF"
+id3 -t "$TITLE" -T "${TRACKNUMBER:-0}" -a "$ARTIST" -A "$ALBUM" -y "$DATE" -g "${GENRE:-12}" "$OUTF"
+
+done
+
+
diff --git a/bin/lowercase b/bin/lowercase
new file mode 100755
index 0000000..09273b0
--- /dev/null
+++ b/bin/lowercase
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# lowerext.sh
+
+while read f; do
+ if [[ "$f" = *.* ]]; then
+ # Extract the basename
+ b="${f%.*}"
+
+ # Extract the extension
+ x="${f##*.}"
+
+ # Convert the extension to lower case
+ # Note: this only works in recent versions of Bash
+ l="${x,,}"
+
+ if [[ "$x" != "$l" ]]; then
+ mv "$f" "$b.$l"
+ fi
+ else
+ continue
+ fi
+done
diff --git a/bin/nvsetup b/bin/nvsetup
new file mode 100755
index 0000000..23adeb5
--- /dev/null
+++ b/bin/nvsetup
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+SCREENCONNECTED=$(xrandr|grep DP-1)
+if [[ -n "$(lsmod|grep nouveau)" ]]; then
+ if [[ "$SCREENCONNECTED" == *connected* && "$SCREENCONNECTED" != *disconnected* ]]; then
+ echo "Setup DP-1"
+# sleep 5
+ xrandr --output DP-1 --auto --primary --output LVDS-1 --auto --right-of DP-1
+ fi
+fi
diff --git a/bin/rmcache b/bin/rmcache
new file mode 100755
index 0000000..463ec10
--- /dev/null
+++ b/bin/rmcache
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+USERDIR=$1
+
+echo "rmcache cleaning up: $USERDIR"
+
+caches=".cache/chromium"
+## take care of the whitespace!
+caches+=" .config/chromium/Default/*Index*"
+caches+=" .thumbnails"
+caches+=" .opera/cache4"
+caches+=" .opera/cache"
+caches+=" .java/deployment/cache"
+caches+=" .local/share/Trash"
+caches+=" .gnome2/epiphany/mozilla/epiphany/Cache"
+caches+=" .adobe/Acrobat/*/Cache"
+caches+=" .adobe/Acrobat/*/Temp"
+caches+=" .macromedia/Flash_Player/*"
+caches+=" .adobe/Flash_Player/AssetCache"
+caches+=" .java/deployment/cache"
+
+pushd $USERDIR > /dev/null
+
+for cache in $caches
+do
+ echo "removing $cache"
+ rm -Rf "$cache"
+ [ $? -eq 0 ] || exit 1
+done
+
+popd > /dev/null
diff --git a/bin/tex2pdf b/bin/tex2pdf
new file mode 100755
index 0000000..ad53029
--- /dev/null
+++ b/bin/tex2pdf
@@ -0,0 +1,24 @@
+#!/bin/bash
+echo "tex2pdf called with $1.tex">~/.tex2pdf.log
+echo "#####################################################">>~/.tex2pdf.log
+echo "################## pdflatex, run #1 #################">>~/.tex2pdf.log
+echo "#####################################################">>~/.tex2pdf.log
+pdflatex -interaction=nonstopmode $1.tex>>~/.tex2pdf.log
+echo "bibtex:">>~/.tex2pdf.log
+bibtex $1.aux>>~/.tex2pdf.log
+echo "#####################################################">>~/.tex2pdf.log
+echo "################## pdflatex, run #2 #################">>~/.tex2pdf.log
+echo "#####################################################">>~/.tex2pdf.log
+pdflatex -interaction=nonstopmode $1.tex>>~/.tex2pdf.log
+echo "#####################################################">>~/.tex2pdf.log
+echo "################## pdflatex, run #3 #################">>~/.tex2pdf.log
+echo "#####################################################">>~/.tex2pdf.log
+pdflatex -interaction=nonstopmode $1.tex>>~/.tex2pdf.log
+echo "#####################################################">>~/.tex2pdf.log
+echo "Launching evince with $1.pdf.">>~/.tex2pdf.log
+evince $1.pdf &
+echo "#####################################################">>~/.tex2pdf.log
+echo "Removing aux,bbl,blg,dvi,lof,log,lot,nav,out,snm,toc.">>~/.tex2pdf.log
+echo "Current directory: $(pwd) and children of depth 1.">>~/.tex2pdf.log
+rm -f $1.{aux,bbl,blg,dvi,lof,log,lot,nav,out,snm,toc}
+rm -f */*.{aux,bbl,blg,dvi,lof,log,lot,nav,out,snm,toc}
diff --git a/bin/texcount.pl b/bin/texcount.pl
new file mode 100755
index 0000000..1f47025
--- /dev/null
+++ b/bin/texcount.pl
@@ -0,0 +1,1391 @@
+#! /usr/bin/env perl
+use strict;
+use warnings;
+use Term::ANSIColor;
+use Encode;
+use POSIX qw(locale_h);
+use locale;
+setlocale(LC_CTYPE,"no_NO");
+
+my $versionnumber="2.2";
+my $versiondate="2009 Apr 30";
+
+###### Set CMD specific settings and variables
+
+# Options and states
+my $verbose=0;
+my $showcodes=1;
+my $showstates=0;
+my $showsubcounts=0;
+my $htmlstyle=0;
+my $includeTeX=0;
+my $briefsum=0;
+my @sumweights;
+my $utf8flag=0;
+
+# Global variables
+my $blankline=0;
+my $errorcount=0;
+
+# CMD specific global variables
+my $totalflag=0;
+my @filelist;
+my $workdir;
+my $globalworkdir="";
+
+###### Set global settings and variables
+
+### Macros for headers
+# Macros that identify headers: i.e. following token or
+# {...} is counted as header. The =>[2] indicates transition to
+# state 2 which is used within headers (although the value is
+# actually never used). This is copied to %TeXmacro and the
+# only role of defining it here is that the counter for the number
+# of headers is incremented by one.
+my %TeXheader=('\title'=>[2],'\part'=>[2],'\chapter'=>[2],
+ '\section'=>[2],'\subsection'=>[2],'\subsubsection'=>[2],
+ '\paragraph'=>[2],'\subparagraph'=>[2]);
+
+### How many tokens to gobble after macro
+# Each macro is assumed to gobble up a given number of
+# tokens (or {...} groups), as well as options [...] before, within
+# and after. The %TeXmacro hash gives a link from a macro
+# (or beginNAME for begin-end groups without the backslash)
+# to either an integer giving the number of tokens to ignore
+# or to an array (specified as [num,num,...]) of length N where
+# N is the number of tokens to be read with the macro and the
+# array values tell how each is to be interpreted (see the status
+# values: 0=ignore, 1=count, etc.). Thus specifying a number N is
+# equivalent to specifying an array [0,...,0] of N zeros.
+#
+# For macros not specified here, the default value is 0: i.e.
+# no tokens are excluded, but [...] options are. Header macros
+# specified in %TeXheader are automatically included here.
+my %TeXmacro=(%TeXheader,
+ '\documentclass'=>1,'\documentstyle'=>1,'\usepackage'=>1, '\hyphenation'=>1,
+ '\pagestyle'=>1,'\thispagestyle'=>1, '\pagenumbering'=>1,'\markboth'=>1, '\markright'=>1,
+ '\newcommand'=>[-3,-3],'\renewcommand'=>[-3,-3],
+ '\newenvironment'=>[-3,-3,-3], 'renewenvironment'=>[-3,-3,-3],
+ '\newfont'=>2,'\newtheorem'=>2,'\bibliographystyle'=>1, '\bibliography'=>1,
+ '\parbox'=>1, '\marginpar'=>[3],'\makebox'=>0, '\raisebox'=>1, '\framebox'=>0,
+ '\newsavebox'=>1, '\sbox'=>1, '\savebox'=>2, '\usebox'=>1,'\rule'=>2,
+ '\footnote'=>[3],'\label'=>1, '\ref'=>1, '\pageref'=>1, '\bibitem'=>1,
+ '\cite'=>1, '\citep'=>1, '\citet'=>1, '\citeauthor'=>1, '\citealt'=>1, '\nocite'=>1,
+ '\eqlabel'=>1, '\eqref'=>1,'\hspace'=>1, '\vspace'=>1, '\addvspace'=>1,
+ '\input'=>1, '\include'=>1, '\includeonly'=>1,'\includegraphics'=>1,
+ '\newlength'=>1, '\setlength'=>2, '\addtolength'=>2,'\settodepth'=>2,
+ '\settoheight'=>2, '\settowidth'=>2,'\newcounter'=>1, '\setcounter'=>2,
+ '\addtocounter'=>2,'\stepcounter'=>1, '\refstepcounter'=>1, '\usecounter'=>1,
+ '\alph'=>1, '\arabic'=>1, '\fnsymbol'=>1, '\roman'=>1, '\value'=>1,
+ '\cline'=>1, '\multicolumn'=>3,'\typeout'=>1, '\typein'=>1,
+ 'beginlist'=>2, 'beginminipage'=>1, 'begintabular'=>1,
+ 'beginthebibliography'=>1,'beginlrbox'=>1,
+ '\begin'=>1,'\end'=>1,'\title'=>[2]);
+
+### Macros that should be counted as one or more words
+# Macros that represent text may be declared here. The value gives
+# the number of words the macro represents.
+my %TeXmacroword=('\LaTeX'=>1,'\TeX'=>1);
+
+### Macros that are counted within the preamble
+# The preamble is the text between \documentclass and \begin{document}.
+# Text and macros in the preamble is ignored unless specified here. The
+# value is the status (1=text, 2=header, etc.) they should be interpreted as.
+# Note that only the first unit (token or {...} block) is counted.
+my %TeXpreamble=('\title'=>[2],
+ '\newcommand'=>[-3,-3],'\renewcommand'=>[-3,-3],
+ '\newenvironment'=>[-3,-3,-3], 'renewenvironment'=>[-3,-3,-3],
+ );
+
+### Begin-End groups
+# Identified as begin-end groups, and define =>state. The
+# states used corresponds to the elements of the count array, and
+# are:
+# 0: Not included
+# 1: Text, word included in text count
+# 2: Header, words included in header count
+# 3: Float caption, words included in float caption count
+# 6: Inline mathematics, words not counted
+# 7: Displayed mathematics, words not counted
+# -1: Float, not included, but looks for captions
+#
+# 4 and 5 are used to count number of headers and floats
+# and are not used as states.
+#
+# Groups that are not defined will be counted as the surrounding text.
+#
+# Note that some environments may only exist within math-mode, and
+# therefore need not be defined here: in fact, they should not as it
+# is not clear if they will be in inlined or displayed math.
+#
+my %TeXgroup=('document'=>1,'letter'=>1,'titlepage'=>0,
+ 'center'=>1,'flushleft'=>1,'flushright'=>1,
+ 'abstract'=>1,'quote'=>1,'quotation'=>1,'verse'=>1,'minipage'=>1,'verbatim'=>1,
+ 'description'=>1,'enumerate'=>1,'itemize'=>1,'list'=>1,
+ 'theorem'=>1,'lemma'=>1,'definition'=>1,'corollary'=>1,'example'=>1,
+ 'math'=>6,'displaymath'=>7,'equation'=>7,'eqnarray'=>7,'align'=>7,
+ 'figure'=>-1,'float'=>-1,'picture'=>-1,'table'=>-1,
+ 'tabbing'=>0,'tabular'=>0,'thebibliography'=>0,'lrbox'=>0);
+
+### In floats: include only specific macros
+# Macros used to identify caption text within floats.
+my %TeXfloatinc=('\caption'=>[3]);
+
+### Macros for including tex files
+# Allows \macro{file} or \macro file. If the value is 0, the filename will
+# be used as is; if it is 1, the filetype .tex will be added if the
+# filename is without filetype; if it is 2, the filetype .tex will be added.
+my %TeXfileinclude=('\input'=>1,'\include'=>2);
+
+### Count labels
+# Labels used to describe the counts
+my @countlabel=('Files','Words in text','Words in headers',
+ 'Words in float captions','Number of headers','Number of floats',
+ 'Number of math inlines','Number of math displayed');
+
+### Break points
+# Definition of macros that define break points that start a new subcount.
+# The values given are used as labels.
+my %BreakPointsOptions;
+$BreakPointsOptions{'none'}={};
+$BreakPointsOptions{'part'}={%{$BreakPointsOptions{'none'}},'\part'=>'Part'};
+$BreakPointsOptions{'chapter'}={%{$BreakPointsOptions{'part'}},'\chapter'=>'Chapter'};
+$BreakPointsOptions{'section'}={%{$BreakPointsOptions{'chapter'}},'\section'=>'Section'};
+$BreakPointsOptions{'subsection'}={%{$BreakPointsOptions{'section'}},'\subsection'=>'Subsection'};
+$BreakPointsOptions{'default'}=$BreakPointsOptions{'subsection'};
+my %BreakPoints=%{$BreakPointsOptions{'none'}};
+
+### Print styles
+# Definition of different print styles: maps of class labels
+# to ANSI codes. Class labels are as used by HTML styles.
+my @STYLES=();
+my %STYLE;
+$STYLES[0]={'error'=>'bold red'};
+$STYLES[1]={%{$STYLES[0]},
+ 'word1'=>'blue','word2'=>'bold blue','word3'=>'blue',
+ 'grouping'=>'red','document'=>'red','mathgroup'=>'magenta',
+ 'state'=>'cyan underline','sumcount'=>'yellow'};
+$STYLES[2]={%{$STYLES[1]},
+ 'command'=>'green','exclcommand'=>'yellow','exclgroup'=>'yellow','exclmath'=>'yellow',
+ 'ignore'=>'cyan'};
+$STYLES[3]={%{$STYLES[2]},
+ 'tc'=>'bold yellow','comment'=>'yellow','option'=>'yellow',
+ 'fileinclude'=>'bold green'};
+$STYLES[4]={%{$STYLES[3]}};
+
+### Word regexp pattern list
+# List of regexp patterns that should be analysed as words.
+# Use @ to represent a letter, will be substituted with $LetterPattern.
+my @WordPatterns=('(@+\.)+@+\.?','@+([\-\']@+)*');
+my $specialchars='\\\\(ae|AE|o|O|aa|AA)';
+my $modifiedchars='\\\\[\'\"\`\~\^\=](\w|\{\w\})';
+my $LetterPattern='\w';
+my $LetterPatternRelaxed='([\w\-\']|'.$modifiedchars.'|'.$specialchars.'(\{\})?|\{'.$specialchars.'\}|\{\w\})';
+my %NamedWordPattern;
+$NamedWordPattern{'chinese'}='\p{script=Han}';
+$NamedWordPattern{'japanese'}='(\p{script=Han}|\p{script=Hiragana}|\p{script=Katakana})';
+
+### Macro option regexp list
+# List of regexp patterns to be gobbled as macro option in and after
+# a macro.
+my @MacroOptionPatterns=('\[(\w|[,\-\s\~\.\:\;\+\?\*\_\=])*\]');
+my @MacroOptionPatternsRelaxed=('\[[^\[\]\n]*\]');
+
+###### Main script
+
+
+###################################################
+
+MAIN(@ARGV);
+
+###################################################
+
+
+#########
+######### Main routines
+#########
+
+# MAIN ROUTINE: Handle arguments, then parse files
+sub MAIN {
+ my @args=@_;
+ my @toplevelfiles=Parse_Arguments(@args);
+ Apply_Options();
+ if (scalar(@toplevelfiles)==0) {
+ conditional_print_help_style()
+ || print_error("No files specified.","p","error");
+ } else {
+ conditional_print_help_style();
+ my $totalcount=parse_file_list(@toplevelfiles);
+ conditional_print_total($totalcount);
+ }
+ Report_ErrorCount();
+ Close_Output();
+}
+
+# Checks arguments, exits on exit condition
+sub Check_Arguments {
+ my @args=@_;
+ if (!@args) {
+ print_version();
+ print_syntax();
+ print_reference();
+ exit;
+ } elsif ($args[0]=~/^(\-?\-(h|\?|help)|\/(\?|h))$/) {
+ print_help();
+ exit;
+ } elsif ($args[0]=~/^\-?\-(ver|version)$/) {
+ print_version();
+ exit;
+ } elsif ($args[0]=~/^\-?\-(lic|license)$/) {
+ print_license();
+ exit;
+ }
+ return 1;
+}
+
+# Parses arguments, sets options (global) and returns file list
+sub Parse_Arguments {
+ my @args=@_;
+ Check_Arguments(@args);
+ my @files;
+ foreach my $arg (@ARGV) {
+ if (Parse_Option($arg)) {next;}
+ if ($arg=~/^\-/) {
+ print 'Invalid opton '.$arg."\n";
+ print_syntax();
+ exit;
+ }
+ $arg=~s/\\/\//g;
+ push @files,$arg;
+ }
+ return @files;
+}
+
+# Parse individual option parameters
+sub Parse_Option {
+ my $arg=shift @_;
+ return parse_options_parsing($arg)
+ || parse_options_sums($arg)
+ || parse_options_output($arg)
+ || parse_options_format($arg)
+ ;
+}
+
+sub parse_options_parsing {
+ my $arg=shift @_;
+ if ($arg eq '-inc') {$includeTeX=1;}
+ elsif ($arg eq '-noinc') {$includeTeX=0;}
+ elsif ($arg eq '-dir') {$globalworkdir=undef;}
+ elsif ($arg=~/^-dir=(.*)$/) {$globalworkdir=$1;}
+ elsif ($arg=~/^-(utf8|unicode)$/) {$utf8flag=1;}
+ elsif ($arg=~/^-(ch|chinese|zhongwen)$/) {
+ $utf8flag=1;
+ @WordPatterns=($NamedWordPattern{'chinese'},@WordPatterns);
+ }
+ elsif ($arg=~/^-(jp|japanese)$/) {
+ $utf8flag=1;
+ @WordPatterns=($NamedWordPattern{'japanese'},@WordPatterns);
+ }
+ elsif ($arg eq '-relaxed') {
+ @MacroOptionPatterns=@MacroOptionPatternsRelaxed;
+ $LetterPattern=$LetterPatternRelaxed;
+ }
+ else {return 0;}
+ return 1;
+}
+
+sub parse_options_sums {
+ my $arg=shift @_;
+ if ($arg=~/^-sum(=(.+))?$/) {option_sum($2);}
+ elsif ($arg=~/^-(sub|subcounts?)(=(.+))?$/) {option_subcount($3);}
+ else {return 0;}
+ return 1;
+}
+
+sub option_subcount {
+ my $arg=shift @_;
+ $showsubcounts=1;
+ if (!defined $arg) {
+ %BreakPoints=%{$BreakPointsOptions{'default'}};
+ } elsif (my $option=$BreakPointsOptions{$arg}) {
+ %BreakPoints=%{$option};
+ } else {
+ print STDERR "Warning: Option value ".$arg." not valid, using default instead.\n";
+ %BreakPoints=%{$BreakPointsOptions{'default'}};
+ }
+}
+
+sub option_sum {
+ my $arg=shift @_;
+ if (!defined $arg) {
+ @sumweights=(1,1,1,0,0,1,1);
+ } elsif ($arg=~/^(\d+(,\d+){0,6})$/) {
+ @sumweights=split(',',$1);
+ } else {
+ print STDERR "Warning: Option value ".$arg." not valid, ignoring option.\n";
+ }
+}
+
+sub parse_options_format {
+ my $arg=shift @_;
+ if ($arg eq '-brief') {$briefsum=1;}
+ elsif ($arg eq '-total') {$totalflag=1;}
+ elsif ($arg eq '-1') {$briefsum=1;$totalflag=1;$verbose=-1;}
+ elsif ($arg eq "-html" ) {option_no_colours();$htmlstyle = 2;}
+ elsif ($arg eq "-htmlcore" ) {option_no_colours();$htmlstyle = 1;}
+ elsif ($arg=~/^\-(nocol|nc$)/) {option_no_colours();}
+ elsif ($arg eq '-codes') {
+ $showcodes=2;
+ if ($verbose==0) {$verbose=3;}
+ }
+ elsif ($arg eq '-nocodes') {$showcodes=0;}
+ else {return 0;}
+ return 1;
+}
+
+sub parse_options_output {
+ my $arg=shift @_;
+ if ($arg eq "-v0") {$verbose=0;}
+ elsif ($arg eq "-v1") {$verbose=1;}
+ elsif ($arg eq '-vv' || $arg eq '-v2') {$verbose=2;}
+ elsif ($arg eq '-vvv' || $arg eq '-v3' || $arg eq '-v') {$verbose=3;}
+ elsif ($arg eq '-vvvv' || $arg eq '-v4') {$verbose=3; $showstates=1;}
+ elsif ($arg =~ /^\-showstates?$/ ) {$showstates=1;}
+ elsif ($arg =~ /^-(q|-?quiet)$/ ) {$verbose=-1;}
+ else {return 0;}
+ return 1;
+}
+
+# Parse file list and return total count
+sub parse_file_list {
+ my @filelist=@_;
+ my $listtotalcount=new_count("TOTAL COUNT");
+ for my $file (<@filelist>) {
+ my $filetotalcount=parse_file($file);
+ add_count($listtotalcount,$filetotalcount);
+ }
+ return $listtotalcount;
+}
+
+# Parse file and included files, and return total count
+sub parse_file {
+ my $file=shift @_;
+ $workdir=$globalworkdir;
+ if (!defined $workdir) {
+ $workdir=$file;
+ $workdir =~ s/^((.*[\\\/])?)[^\\\/]+$/$1/;
+ }
+ @filelist=($file);
+ if ($htmlstyle) {print "\n<div class='filegroup'>\n";}
+ my $filetotalcount=new_count("SUM COUNT FOR ".$file);
+ foreach my $f (@filelist) {
+ my $tex=TeXfile($f);
+ my $fpath=$f;
+ $fpath=~s/^((.*[\\\/])?)[^\\\/]+$/$1/;
+ if (!defined $tex) {
+ print STDERR "File not found or not readable: ".$f."\n";
+ formatprint("File not found or not readable: ".$f."\n","p","error");
+ } else {
+ parse($tex);
+ my $filecount=add_subcount($tex);
+ if (!$totalflag) {
+ print_count($filecount);
+ print "\n";
+ }
+ add_count($filetotalcount,$filecount);
+ }
+ }
+ if ($htmlstyle) {print "</div>\n\n";}
+ return $filetotalcount;
+}
+
+
+######
+###### Subroutines
+######
+
+###### CMD specific implementations
+
+
+sub add_file_to_list {
+ my $fname=shift @_;
+ push @filelist,$workdir.$fname;
+}
+
+sub print_with_style {
+ my ($text,$style,$colour)=@_;
+ #if ($utf8flag || $htmlstyle) {utf8::encode($text);}
+ if ($htmlstyle) {
+ print "<span class='".$style."'>".$text."</span>";
+ } else {
+ print Term::ANSIColor::colored($text,$colour);
+ }
+}
+
+sub option_no_colours {
+ $ENV{'ANSI_COLORS_DISABLED'} = 1;
+}
+
+# Print count (total) if conditions are met
+sub conditional_print_total {
+ my $sumcount=shift @_;
+ if ($totalflag || get_count($sumcount,0)>1) {
+ if ($totalflag && $briefsum && @sumweights) {
+ print total_count($sumcount);
+ } else {
+ if ($htmlstyle) {
+ formatprint("Total word count",'h2');
+ }
+ print_count($sumcount);
+ }
+ }
+}
+
+###### Option handling
+
+
+# Apply options to set values
+sub Apply_Options {
+ %STYLE=%{$STYLES[$verbose]};
+ if ($utf8flag) {binmode STDOUT,':utf8';}
+ if ($htmlstyle>1) {html_head();}
+ foreach (@WordPatterns) {
+ s/\@/$LetterPattern/g;
+ }
+}
+
+
+###### TeX code handle
+
+
+sub TeXfile {
+ my $filename=shift @_;
+ my $file=read_file($filename) || return undef;
+ return TeXcode($file,$filename);
+}
+
+sub read_file {
+ my $filename=shift @_;
+ if ($utf8flag) {
+ open(FH,"<:utf8",$filename) || return undef;
+ } else {
+ open(FH,"<".$filename) || return undef;
+ }
+ if ($verbose>0) {
+ formatprint("File: ".$filename."\n",'h2');
+ $blankline=0;
+ }
+ my @text=<FH>;
+ close(FH);
+ my $latexcode=join('',@text);
+ if ($utf8flag) {
+ $latexcode =~ s/^\x{feff}//;
+ }
+ return $latexcode;
+}
+
+###### Parsing routines
+
+
+# Make TeXcode handle
+sub TeXcode {
+ my ($texcode,$filename,$title)=@_;
+ my %TeX=();
+ $TeX{'filename'}=$filename;
+ if (!defined $filename) {
+ $TeX{'filepath'}='';
+ } elsif ($filename=~/^(.*[\\\/])[^\\\/]+$/) {
+ $TeX{'filepath'}=$1;
+ } else {
+ $TeX{'filepath'}='';
+ }
+ if (defined $title) {}
+ elsif (defined $filename) {$title="FILE: ".$filename;}
+ else {$title="Word count";}
+ $TeX{'line'}=$texcode;
+ $TeX{'next'}=undef;
+ $TeX{'type'}=undef;
+ $TeX{'style'}=undef;
+ $TeX{'printstate'}=undef;
+ $TeX{'eof'}=0;
+ my $countsum=new_count($title);
+ $TeX{'countsum'}=$countsum;
+ my $count=new_count("_top_");
+ $TeX{'count'}=$count;
+ inc_count(\%TeX,0);
+ my @countlist=();
+ $TeX{'countlist'}=\@countlist;
+ $countsum->{'subcounts'}=\@countlist;
+ return \%TeX;
+}
+
+# Parse LaTeX document
+sub parse {
+ my ($tex)=@_;
+ if ($htmlstyle && $verbose) {print "<p class=parse>\n";}
+ while (!($tex->{'eof'})) {
+ parse_unit($tex,1);
+ }
+ if ($htmlstyle && $verbose) {print "</p>\n";}
+}
+
+# Parse one block or unit
+sub parse_unit {
+ # Status:
+ # 0 = exclude from count
+ # 1 = text
+ # 2 = header text
+ # 3 = float text
+ # -1 = float (exclude)
+ # -2 = strong exclude, ignore begin-end groups
+ # -3 = stronger exclude, do not parse macro parameters
+ # -9 = preamble (between \documentclass and \begin{document})
+ my ($tex,$status,$end)=@_;
+ if (!defined $status) {
+ print_error("CRITICAL ERROR: Undefined parser status!");
+ exit;
+ } elsif (ref($status) eq 'ARRAY') {
+ print_error("CRITICAL ERROR: Invalid parser status!");
+ exit;
+ }
+ my $substat;
+ if ($showstates) {
+ if (defined $end) {
+ $tex->{'printstate'}=':'.$status.':'.$end.':';
+ } else {
+ $tex->{'printstate'}=':'.$status.':';
+ }
+ flush_next($tex);
+ }
+ while (defined (my $next=next_token($tex))) {
+ # parse next token; or tokens until match with $end
+ set_style($tex,"ignore");
+ if ((defined $end) && ($end eq $next)) {
+ # end of unit
+ return;
+ } elsif (!defined $next) {
+ print_error("ERROR: End of file while waiting for ".$end);
+ return;
+ }
+ if ($status==-9 && $next eq '\begin' && $tex->{'line'}=~/^\{\s*document\s*\}/) {
+ # \begin{document}
+ $status=1;
+ }
+ if ($next eq '\documentclass') {
+ # starts preamble
+ set_style($tex,'document');
+ gobble_option($tex);
+ gobble_macro_parms($tex,1);
+ while (!($tex->{'eof'})) {
+ parse_unit($tex,-9);
+ }
+ } elsif ($tex->{'type'}==666) {
+ # parse TC instructions
+ parse_tc($tex);
+ } elsif ($tex->{'type'}==1) {
+ # word
+ if ($status>0) {
+ inc_count($tex,$status);
+ set_style($tex,'word'.$status);
+ }
+ } elsif ($next eq '{') {
+ # {...}
+ parse_unit($tex,$status,'}');
+ } elsif ($tex->{'type'}==3 && $status==-3) {
+ set_style($tex,'ignore');
+ } elsif ($tex->{'type'}==3) {
+ # macro call
+ parse_macro($tex,$next,$status,$substat);
+ } elsif ($next eq '$') {
+ # math inline
+ parse_math($tex,$status,6,'$');
+ } elsif ($next eq '$$') {
+ # math display (unless already in inlined math)
+ if (!(defined $end && $end eq '$')) {
+ parse_math($tex,$status,7,'$$');
+ }
+ }
+ if (!defined $end) {return;}
+ }
+}
+
+sub parse_macro {
+ my ($tex,$next,$status,$substat)=@_;
+ if (my $label=$BreakPoints{$next}) {
+ if ($tex->{'line'}=~ /^[*]?(\s*\[.*?\])*\s*\{(.+?)\}/ ) {
+ $label=$label.': '.$2;
+ }
+ add_subcount($tex,$label);
+ }
+ set_style($tex,$status>0?'command':'exclcommand');
+ if ($next eq '\begin' && $status!=-2) {
+ parse_begin_end($tex,$status);
+ } elsif (($status==-1) && ($substat=$TeXfloatinc{$next})) {
+ # text included from float
+ set_style($tex,'command');
+ gobble_macro_parms($tex,$substat);
+ } elsif ($status==-9 && defined ($substat=$TeXpreamble{$next})) {
+ # parse preamble include macros
+ set_style($tex,'command');
+ if (defined $TeXheader{$next}) {inc_count($tex,4);}
+ gobble_macro_parms($tex,$substat,1);
+ } elsif ($status<0) {
+ # ignore
+ gobble_option($tex);
+ } elsif ($next eq '\(') {
+ # math inline
+ parse_math($tex,$status,6,'\)');
+ } elsif ($next eq '\[') {
+ # math display
+ parse_math($tex,$status,7,'\]');
+ } elsif ($next eq '\def') {
+ # ignore \def...
+ $tex->{'line'} =~ s/^([^\{]*)\{/\{/;
+ flush_next($tex);
+ print_style($1.' ','ignore');
+ parse_unit($tex,-2);
+ } elsif (defined (my $addsuffix=$TeXfileinclude{$next})) {
+ # include file: queue up for parsing
+ parse_include_file($tex,$status,$addsuffix);
+ } elsif (defined ($substat=$TeXmacro{$next})) {
+ # macro: exclude options
+ if (defined $TeXheader{$next}) {inc_count($tex,4);}
+ gobble_macro_parms($tex,$substat,$status);
+ } elsif (defined ($substat=$TeXmacroword{$next})) {
+ # count macro as word (or a given number of words)
+ inc_count($tex,$status,$substat);
+ set_style($tex,'word'.$status);
+ } elsif ($next =~ /^\\[^\w\_]/) {
+ } else {
+ gobble_option($tex);
+ }
+}
+
+sub parse_tc {
+ my ($tex)=@_;
+ my $next=$tex->{'next'};
+ set_style($tex,'tc');
+ flush_next($tex);
+ if (!($next=~s/^\%+TC:\s*(\w+)\s*// )) {
+ print_error('Warning: TC command should have format %TC:instruction [macro] [parameters]');
+ return;
+ };
+ my $instr=$1;
+ if ($instr=~/^(break)$/) {
+ if ($instr eq 'break') {add_subcount($tex,$next);}
+ } elsif ($next=~/^([\\]*\w+)\s+([^\s\n]+)(\s+([0-9]+))?/) {
+ # Format = TC:word macro
+ my $macro=$1;
+ my $param=$2;
+ my $option=$4;
+ if ($param=~/^\[([0-9,]+)\]$/) {$param=[split(',',$1)];}
+ if (($instr eq 'macro') || ($instr eq 'exclude')) {$TeXmacro{$macro}=$param;}
+ elsif ($instr eq 'header') {$TeXheader{$macro}=$param;$TeXmacro{$macro}=$param;}
+ elsif ($instr eq 'macroword') {$TeXmacroword{$macro}=$param;}
+ elsif ($instr eq 'preambleinclude') {$TeXpreamble{$macro}=$param;}
+ elsif ($instr eq 'group') {
+ $TeXmacro{'begin'.$macro}=$param;
+ $TeXgroup{$macro}=$option;
+ }
+ elsif ($instr eq 'floatinclude') {$TeXfloatinc{$macro}=$param;}
+ elsif ($instr eq 'fileinclude') {$TeXfileinclude{$macro}=$param;}
+ elsif ($instr eq 'breakmacro') {$BreakPoints{$macro}=$param;}
+ else {print_error("Warning: Unknown TC command: ".$instr);}
+ } elsif ($instr eq 'ignore') {
+ tc_ignore_input($tex);
+ } else {
+ print_error("Warning: Invalid TC command format: ".$instr);
+ }
+}
+
+sub tc_ignore_input {
+ my ($tex)=@_;
+ set_style($tex,'ignore');
+ parse_unit($tex,-3,"%TC:endignore");
+ set_style($tex,'tc');
+ flush_next($tex);
+}
+
+sub parse_math {
+ my ($tex,$status,$substat,$end)=@_;
+ my $localstyle=$status>0 ? 'mathgroup' : 'exclmath';
+ if ($status>0) {inc_count($tex,$substat);}
+ set_style($tex,$localstyle);
+ parse_unit($tex,0,$end);
+ set_style($tex,$localstyle);
+}
+
+sub parse_begin_end {
+ my ($tex,$status)=@_;
+ my $localstyle=$status>0 ? 'grouping' : 'exclgroup';
+ flush_style($tex,$localstyle);
+ gobble_option($tex);
+ my $groupname;
+ if ($tex->{'line'} =~ s/^\{\s*([^\{\}]+)\s*\*?\}[ \t\r\f]*//) {
+ # gobble group type
+ $groupname=$1;
+ print_style('{'.$1.'}',$localstyle);
+ my $next='begin'.$1;
+ if (defined (my $substat=$TeXmacro{$next})) {
+ gobble_macro_parms($tex,$substat);
+ }
+ } else {
+ print_error("Warning: BEGIN group without type.");
+ }
+ # find group status (or leave unchanged)
+ my $substat;
+ defined ($substat=$TeXgroup{$1}) || ($substat=$status);
+ if ($status<=0 && $status<$substat) {$substat=$status;}
+ if (($status>0) && ($substat==-1)) {
+ # Count float
+ inc_count($tex,5);
+ }
+ if ($status>0 and $substat>3) {
+ # count item, exclude contents
+ inc_count($tex,$substat);
+ $substat=0;
+ }
+ parse_unit($tex,$substat,'\end');
+ if ($tex->{'line'} =~ s/^\{\s*([^\{\}]+)\s*\}[ \t\r\f]*//) {
+ # gobble group type
+ flush_style($tex,$localstyle);
+ print_style('{'.$1.'}',$localstyle);
+ } else {
+ print_error("Warning: END group without type while waiting to end ".$groupname.".");
+ }
+}
+
+sub parse_include_file {
+ my ($tex,$status,$addsuffix)=@_;
+ $tex->{'line'} =~ s/^\{([^\{\}\s]+)\}// ||
+ $tex->{'line'} =~ s/^\s*([^\{\}\%\\\s]+)// ||
+ return;
+ flush_next($tex);
+ if ($status>0) {
+ print_style($&,'fileinclude');
+ my $fname=$1;
+ if ($addsuffix==2) {$fname.='.tex';}
+ elsif ($addsuffix==1 && ($fname=~/^[^\.]+$/)) {$fname.='.tex';}
+ if ($includeTeX) {add_file_to_list($fname);}
+ } else {
+ print_style($&,'ignored');
+ }
+}
+
+sub gobble_option {
+ my $tex=shift @_;
+ flush_next($tex);
+ foreach my $pattern (@MacroOptionPatterns) {
+ if ($tex->{'line'}=~s/^($pattern)//) {
+ print_style($1,'option');
+ return $1;
+ }
+ }
+ return undef;
+}
+
+sub gobble_options {
+ while (gobble_option(@_)) {}
+}
+
+sub gobble_macro_modifier {
+ my $tex=shift @_;
+ flush_next($tex);
+ if ($tex->{'line'} =~ s/^\*//) {
+ print_style($1,'option');
+ return $1;
+ }
+ return undef;
+}
+
+sub gobble_macro_parms {
+ my ($tex,$parm,$oldstat)=@_;
+ my $i;
+ if (ref($parm) eq 'ARRAY') {
+ $i=scalar @{$parm};
+ } else {
+ $i=$parm;
+ $parm=[0,0,0,0,0,0,0,0,0];
+ }
+ if ($i>0) {gobble_macro_modifier($tex);}
+ gobble_options($tex);
+ for (my $j=0;$j<$i;$j++) {
+ parse_unit($tex,new_status($parm->[$j],$oldstat));
+ gobble_options($tex);
+ }
+}
+
+sub new_status {
+ my ($substat,$old)=@_;
+ if (!defined $old) {return $substat;}
+ if ($old==-3 || $substat==-3) {return -3;}
+ if ($old==-2 || $substat==-2) {return -2;}
+ if ($old==0 || $substat==0) {return 0;}
+ if ($old==-9 || $substat==-9) {return -9;}
+ if ($old>$substat) {return $old;}
+ return $substat;
+}
+
+sub next_token {
+ my $tex=shift @_;
+ my ($next,$type);
+ if (defined $tex->{'next'}) {print_style($tex->{'next'}.' ',$tex->{'style'});}
+ $tex->{'style'}=undef;
+ while (defined ($next=get_next_token($tex))) {
+ $type=$tex->{'type'};
+ if ($type==0) {
+ print_style($next,'comment');
+ } elsif ($type==9) {
+ if ($verbose>0) {line_return(1,$tex);}
+ } else {
+ return $next;
+ }
+ }
+ return $next;
+}
+
+
+sub get_next_token {
+ # Token (or token group) category:
+ # 0: comment
+ # 1: word (or other forms of text or text components)
+ # 2: symbol (not word, e.g. punctuation)
+ # 3: macro
+ # 4: curly braces {}
+ # 5: brackets []
+ # 6: maths
+ # 9: line break in file
+ # 999: end of line or blank line
+ # 666: TeXcount instruction (%TC:instruction)
+ my $tex=shift @_;
+ my $next;
+ (defined ($next=get_token($tex,'\%+TC:\s*endignore\b[^\r\n]*',666))) && return "%TC:endignore";
+ (defined ($next=get_token($tex,'\%+TC:[^\r\n]*',666))) && return $next;
+ (defined ($next=get_token($tex,'\%[^\r\n]*',0))) && return $next;
+ (defined ($next=get_token($tex,'(\r|\n|\r\n)',9))) && return $next;
+ (defined ($next=get_token($tex,'\\\\[\{\}]',2))) && return $next;
+ foreach my $pattern (@WordPatterns) {
+ (defined ($next=get_token($tex,$pattern,1))) && return $next;
+ }
+ (defined ($next=get_token($tex,'[\"\'\`:\.,\(\)\[\]!\+\-\*=/\^\_\@\<\>\~\#\&]',2))) && return $next;
+ (defined ($next=get_token($tex,'\\\\([a-zA-Z_]+|[^a-zA-Z_])',3))) && return $next;
+ (defined ($next=get_token($tex,'[\{\}]',4))) && return $next;
+ (defined ($next=get_token($tex,'[\[\]]',5))) && return $next;
+ (defined ($next=get_token($tex,'\$\$',6))) && return $next;
+ (defined ($next=get_token($tex,'\$',6))) && return $next;
+ (defined ($next=get_token($tex,'.',999))) && return $next;
+ (defined ($next=get_token($tex,'[^\s]+',999))) && return $next;
+ $tex->{'eof'}=1;
+ return undef;
+}
+
+sub get_token {
+ my ($tex,$regexp,$type)=@_;
+ if (!defined $regexp) {print_error("ERROR in get_token: undefined regex.");}
+ if (!defined $tex->{'line'}) {print_error("ERROR in get_token: undefined tex-line. ".$tex->{'next'});}
+ if ( $tex->{'line'} =~ s/^($regexp)[ \t\r\f]*// ) {
+ $tex->{'next'}=$1;
+ $tex->{'type'}=$type;
+ return $1;
+ }
+ return undef;
+}
+
+###### Count handling routines
+
+
+sub new_count {
+ my ($title)=@_;
+ my @cnt=(0,0,0,0,0,0,0,0);
+ my %count=('count'=>\@cnt,'title'=>$title);
+ # files, text words, header words, float words,
+ # headers, floats, math-inline, math-display;
+ return \%count;
+}
+
+sub inc_count {
+ my ($tex,$type,$value)=@_;
+ my $count=$tex->{'count'};
+ if (!defined $value) {$value=1;}
+ ${$count->{'count'}}[$type]+=$value;
+}
+
+sub get_count {
+ my ($count,$type)=@_;
+ return ${$count->{'count'}}[$type];
+}
+
+sub total_count {
+ my ($count)=@_;
+ my $sum=0;
+ for (my $i=scalar(@sumweights);$i-->0;) {
+ $sum+=get_count($count,$i+1)*$sumweights[$i];
+ }
+ return $sum;
+}
+
+sub print_count {
+ my ($count,$header)=@_;
+ if ($briefsum && @sumweights) {
+ print_count_total($count,$header);
+ } elsif ($briefsum) {
+ if ($htmlstyle) {print "<p class='briefcount'>";}
+ print_count_brief($count,$header);
+ if ($htmlstyle) {print "</p>\n";}
+ } else {
+ print_count_details($count,$header);
+ }
+}
+
+sub print_count_with_header {
+ my ($count,$header)=@_;
+ if (!defined $header) {$header=$count->{'title'};}
+ if (!defined $header) {$header="";}
+ return $count,$header;
+}
+
+sub print_count_total {
+ my ($count,$header)=print_count_with_header(@_);
+ if ($htmlstyle) {print "<p class='count'>".$header;}
+ print total_count($count);
+ if ($htmlstyle) {print "</p>\n";}
+ else {print ": ".$header;}
+}
+
+sub print_count_brief {
+ my ($count,$header)=print_count_with_header(@_);
+ my $cnt=$count->{'count'};
+ print ${$cnt}[1]."+".${$cnt}[2]."+".${$cnt}[3].
+ " (".${$cnt}[4]."/".${$cnt}[5]."/".${$cnt}[6]."/".${$cnt}[7].") ".
+ $header;
+}
+
+sub print_count_details {
+ my ($count,$header)=print_count_with_header(@_);
+ if ($htmlstyle) {print "<dl class='count'>\n";}
+ if (defined $header) {
+ formatprint($header."\n",'dt','header');
+ }
+ if (get_count($count,0)>1) {
+ formatprint($countlabel[0].': ','dt');
+ formatprint(get_count($count,0)."\n",'dd');
+ }
+ if (@sumweights) {
+ formatprint('Sum count: ','dt');
+ formatprint(total_count($count)."\n",'dd');
+ }
+ for (my $i=1;$i<8;$i++) {
+ formatprint($countlabel[$i].': ','dt');
+ formatprint(get_count($count,$i)."\n",'dd');
+ }
+ my $subcounts=$count->{'subcounts'};
+ if ($showsubcounts && defined $subcounts && scalar(@{$subcounts})>1) {
+ formatprint("Subcounts: text+headers+captions (#headers/#floats/#inlines/#displayed)\n",'dt');
+ foreach my $subcount (@{$subcounts}) {
+ if ($htmlstyle) {print "<dd class='briefcount'>";}
+ print_count_brief($subcount);
+ if ($htmlstyle) {print "</dd>";}
+ print "\n";
+ }
+ }
+ if ($htmlstyle) {print "</dl>\n";}
+}
+
+sub add_count {
+ my ($a,$b)=@_;
+ for (my $i=0;$i<8;$i++) {
+ ${$a->{'count'}}[$i]+=${$b->{'count'}}[$i];
+ }
+}
+
+sub add_subcount {
+ my ($tex,$title)=@_;
+ add_count($tex->{'countsum'},$tex->{'count'});
+ push @{$tex->{'countlist'}},$tex->{'count'};
+ $tex->{'count'}=new_count($title);
+ return $tex->{'countsum'};
+}
+
+###### Printing routines
+
+
+sub set_style {
+ my ($tex,$style)=@_;
+ if (!(($tex->{'style'}) && ($tex->{'style'} eq '-'))) {$tex->{'style'}=$style;}
+}
+
+sub flush_style {
+ my ($tex,$style)=@_;
+ set_style($tex,$style);
+ flush_next($tex);
+}
+
+sub line_return {
+ my ($blank,$tex)=@_;
+ if ($blank>$blankline) {
+ if ((defined $tex) && @sumweights) {
+ my $num=total_count($tex->{'count'});
+ print_style(" [".$num."]","sumcount");
+ }
+ linebreak();
+ $blankline++;
+ }
+}
+
+sub linebreak {
+ if ($htmlstyle) {print "<br>\n";} else {print "\n";}
+}
+
+sub print_style {
+ my ($text,$style,$state)=@_;
+ (($verbose>=0) && (defined $text) && (defined $style)) || return 0;
+ my $colour;
+ ($colour=$STYLE{$style}) || return;
+ if (($colour) && !($colour eq '-')) {
+ print_with_style($text,$style,$colour);
+ if ($state) {
+ print_style($state,'state');
+ }
+ $blankline=-1;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+sub print_error {
+ my $text=shift @_;
+ $errorcount++;
+ if ($verbose>=0) {
+ line_return(1);
+ print_style("!!! ".$text." !!!",'error');
+ line_return(1);
+ }
+}
+
+sub formatprint {
+ my ($text,$tag,$class)=@_;
+ my $break=($text=~s/\n$//);
+ if ($htmlstyle && defined $tag) {
+ print '<'.$tag;
+ if ($class) {print " class='".$class."'";}
+ print '>'.$text.'</'.$tag.'>';
+ } else {
+ print $text;
+ }
+ if ($break) {print "\n";}
+}
+
+sub flush_next {
+ my $tex=shift @_;
+ if (defined $tex->{'next'}) {
+ print_style($tex->{'next'}.' ',$tex->{'style'},$tex->{'printstate'});
+ }
+ $tex->{'printstate'}=undef;
+ $tex->{'style'}='-';
+}
+
+
+# Close the output, e.g. adding HTML tail
+sub Close_Output {
+ if ($htmlstyle>1) {
+ html_tail();
+ }
+}
+
+
+# Report if there were any errors occurring during parsing
+sub Report_ErrorCount {
+ if ($errorcount==0) {return;}
+ if ($briefsum && $totalflag) {print " ";}
+ if ($htmlstyle) {
+ print_error("Errors:".$errorcount,"p","error");
+ } else {
+ print "(errors:".$errorcount.")";
+ }
+}
+
+
+sub print_help_style {
+ if ($verbose<=0) {return;}
+ formatprint("Format/colour codes of verbose output:","h2");
+ print "\n\n";
+ if ($htmlstyle) {print "<p class='stylehelp'>";}
+ help_style_line('Text which is counted',"word1","counted as text words");
+ help_style_line('Header and title text',"word2","counted as header words");
+ help_style_line('Caption text and footnotes',"word3","counted as caption words");
+ help_style_line("Ignored text or code","ignore","excluded or ignored");
+ help_style_line('\documentclass',"document","document start, beginning of preamble");
+ help_style_line('\macro',"command","macro not counted, but parameters may be");
+ help_style_line('\macro',"exclcommand","macro in excluded region");
+ help_style_line("[Macro options]","option","not counted");
+ help_style_line('\begin{group} \end{group}',"grouping","begin/end group");
+ help_style_line('\begin{group} \end{group}',"exclgroup","begin/end group in excluded region");
+ help_style_line('$ $',"mathgroup","counted as one equation");
+ help_style_line('$ $',"exclmath","equation in excluded region");
+ help_style_line('% Comments',"comment","not counted");
+ help_style_line('%TC:TeXcount instructions',"tc","not counted");
+ help_style_line("File to include","fileinclude","not counted but file may be counted later");
+ if ($showstates) {
+ help_style_line('[state]',"state","internal TeXcount state");
+ }
+ if (@sumweights) {
+ help_style_line('[sumcount]',"sumcount","cumulative sum count");
+ }
+ help_style_line("ERROR","error","TeXcount error message");
+ if ($htmlstyle) {print "</p>";}
+ print "\n\n";
+}
+
+sub help_style_line {
+ my ($text,$style,$comment)=@_;
+ if ($htmlstyle) {
+ $comment="&nbsp;&nbsp;....&nbsp;&nbsp;".$comment;
+ } else {
+ $comment=" .... ".$comment;
+ }
+ if (print_style($text,$style)) {
+ print $comment;
+ linebreak();
+ }
+}
+
+# Print output style codes if conditions are met
+sub conditional_print_help_style {
+ if ($showcodes) {print_help_style();}
+ return $showcodes;
+}
+
+###### HTML routines
+
+
+
+sub html_head {
+ print "<html>\n<head>";
+ if ($utf8flag) {
+ print "\n<meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\">";
+ }
+ print '
+<style>
+<!--
+body {width:auto;padding:5;margin:5;}
+.error {font-weight:bold;color:#f00;font-style:italic;}
+.word1,.word2,.word3 {color: #009;}
+.word2 {font-weight: 700;}
+.word3 {font-style: italic;}
+.command {color: #c00;}
+.exclcommand {color: #f99;}
+.option {color: #cc0;}
+.grouping, .document {color: #900; font-weight:bold;}
+.mathgroup {color: #090;}
+.exclmath {color: #6c6;}
+.ignore {color: #999;}
+.exclgroup {color:#c66;}
+.tc {color: #999; font-weight:bold;}
+.comment {color: #999; font-style: italic;}
+.state {color: #990; font-size: 70%;}
+.sumcount {color: #999; font-size: 80%;}
+.fileinclude {color: #696; font-weight:bold;}
+dl.count {background: #cfc; color: 009;}
+dl.count dt.header {font-weight: bold; font-style: italic; float: none;}
+dl.count dt {clear: both; float: left; margin-right: .5em;}
+dl.count dd {font-weight: bold;}
+dl.count dd.briefcount {font-weight: 700; clear: both; font-size:80%; font-weight:normal; margin-left:8pt;}
+.warning {color: #c00; font-weight: 700;}
+.parse, .count, .stylehelp, .filegroup {border: solid 1px #999; margin: 0pt; padding: 4pt;}
+.parse {font-size: 80%; background: #eef;}
+.parse {border-bottom:none;}
+.stylehelp {font-size: 80%; background: #ffc; margin-bottom: 8pt;}
+.filegroup {background: #efe; margin-bottom: 8pt;}
+-->
+</style>
+</head>
+<body>
+<h1>LaTeX word count</h1>
+';
+}
+
+sub html_tail {
+ print '</body></html>';
+}
+
+###### Help routines
+
+
+
+sub print_version {
+ print "TeXcount version ".$versionnumber.", ".$versiondate.'.';
+}
+
+sub print_syntax {
+ print '
+Syntax: TeXcount.pl [options] files
+
+Options:
+ -relaxed Uses relaxed rules for word and option handling:
+ i.e. allows more general cases to be counted as
+ either words or macros.
+ -v Verbose (same as -v3)
+ -v0 Do not present parsing details
+ -v1 Verbose: print parsed words, mark formulae
+ -v2 More verbose: also print ignored text
+ -v3 Even more verbose: include comments and options
+ -v4 Same as -v3 -showstate
+ -showstate Show internal states (with verbose)
+ -brief Only prints a brief, one line summary of counts
+ -q, -quiet Quiet mode, no error messages (use is discouraged!)
+ -sum, -sum= Make sum of all word and equation counts. May also
+ use -sum=#[,#] with up to 7 numbers to indicate how
+ each of the counts (text words, header words, caption
+ words, #headers, #floats, #inlined formulae,
+ #displayed formulae) are summed. The default sum (if
+ only -sum is used) is the same as -sum=1,1,1,0,0,1,1.
+ -sub, -sub= Generate subcounts. Option values are none, part,
+ chapter, section or subsection. Default (-sub) is set
+ to subsection, whereas unset is none. (Alternative
+ option name is -subcount.)
+ -nc, -nocol No colours (colours require ANSI)
+ -html Output in HTML format
+ -htmlcore Only HTML body contents
+ -inc Include tex files included in the document
+ -noinc Do not include included tex files (default)
+ -total Do not give sums per file, only total sum.
+ -1 Same as -brief and -total. Ensures there is only one
+ line of output. If used in conjunction with -sum, the
+ output will only be the total number. (NB: Character
+ is the number one, not the letter L.)
+ -dir, -dir= Specify the working directory using -dir=path.
+ Remember that the path must end with \ or /. If only
+ -dir is used, the directory of the parent file is used.
+ -utf8, -unicode Turns on Unicode (UTF-8) for input and output. This
+ is automatic with -chinese, and is required to handle
+ e.g. Korean text. Note that the TeX file must be save
+ in UTF-8 format (not e.g. GB2312 or Big5), or the
+ result will be unpredictable.
+ -ch, -chinese, -zhongwen Turns on support for Chinese characters.
+ TeXcount will then count each Chinese character as a
+ word. Automatically turns on -utf8.
+ -jp, -japanese Turns on support for Japanese characters. TeXcount
+ will count each Japanese character (kanji, hiragana,
+ and katakana) as one word, i.e. not do any form of
+ word segmentation. Automatically turns on -utf8.
+ -codes Display output style code overview and explanation.
+ This is on by default.
+ -nocodes Do not display output style code overview.
+ -h, -?, --help, /? Help
+ --version Print version number
+ --license License information
+';
+}
+
+sub print_help {
+ print '
+***************************************************************
+* TeXcount.pl '.$versionnumber.', '.$versiondate.'
+*
+
+Count words in TeX and LaTeX files, ignoring macros, tables,
+formulae, etc.
+';
+ print_syntax();
+ print '
+The script counts words as either words in the text, words in
+headers/titles or words in floats (figure/table captions).
+Macro options (i.e. \marco[...]) are ignored; macro parameters
+(i.e. \macro{...}) are counted or ignored depending on the
+macro, but by default counted. Begin-end groups are by default
+ignored and treated as \'floats\', though some (e.g. center) are
+counted.
+
+Unless -nocol (or -nc) has been specified, the output will be
+colour coded. Counted text is coloured blue with headers are in
+bold and in HTML output caption text is italicised.
+
+Mathematical formulae are not counted as words, but are instead
+counted separately with separate counts for inlined formulae
+and displayed formulae. Similarly, the number of headers and
+the number of \'floats\' are counted. Note that \'float\' is used
+here to describe anything defined in a begin-end group unless
+explicitly recognized as text or mathematics.
+
+The verbose options (-v1, -v2, -v3, showstate) produces output
+indicating how the text has been interpreted. Check this to
+ensure that words in the text has been interpreted as such,
+whereas mathematical formulae and text/non-text in begin-end
+groups have been correctly interpreted.
+
+Parsing instructions may be passed to TeXcount using comments
+in the LaTeX files on the format
+ %TC:instruction arguments
+where valid instructions for setting parsing rules, typically
+set at the start of the document (applies globally), are:
+ %TC:macro [macro] [param.states]
+ macro handling rule, no. of and rules for parameters
+ %TC:macroword [macro] [number]
+ macro counted as a given number of words
+ %TC:header [macro] [param.states]
+ header macro rule, as macro but counts as one header
+ %TC:breakmacro [macro] [label]
+ macro causing subcount break point
+ %TC:group [name] [parsing-state]
+ begin-end-group handling rule
+ %TC:floatinclude [macro] [param.states]
+ as macro, but also counted inside floats
+ %TC:preambleinclude [macro] [param.states]
+ as macro, but also counted inside the preamble
+ %TC:fileinclue [macro] [rule]
+ file include, add .tex if rule=2, not if rule=0
+The [param.states] is used to indicate the number of parameters
+used by the macro and the rules of handling each of these: format
+is [#,#,...,#] with one number for each parameter, and main rules
+are 0 to ignore and 1 to count as text. Parsing instructions
+which may be used anywhere are:
+ %TC:ignore start block to ignore
+ %TC:endignore end block to ignore
+ %TC:break [title] add subcount break point here
+See the documentation for more details.
+
+Unix hint: Use \'less -r\' instead of just \'less\' to view output:
+the \'-r\' option makes less treat text formating codes properly.
+
+Windows hint: If your Windows interprets ANSI colour codes, lucky
+you! Otherwise, use the -nocol (or -nc) option with the verbose
+options or the output will be riddled with colour codes. Instead,
+you can use -html to produce HTML code, write this to file and
+view with your favourite browser.
+';
+ print_reference();
+}
+
+sub print_reference {
+ print '
+The TeXcount script is copyright of Einar Andreas Rødland (2008)
+and published under the LaTeX Project Public License.
+
+For more information about the script, e.g. news, updates, help,
+usage tips, known issues and short-comings, go to
+ http://folk.uio.no/einarro/Comp/texwordcount.html
+or go to
+ http://folk.uio.no/einarro/Services/texcount.html
+to access the script as a web service. Feedback such as problems
+or errors can be reported to einarro@ifi.uio.no.
+';
+}
+
+sub print_license {
+ print 'TeXcount version '.$versionnumber.'
+
+Copyright 2008 Einar Andreas Rødland
+
+The TeXcount script is published under the LaTeX Project Public
+License (LPPL)
+ http://www.latex-project.org/lppl.txt
+which grants you, the user, the right to use, modify and distribute
+the script. However, if the script is modified, you must change its
+name or use other technical means to avoid confusion.
+
+The script has LPPL status "maintained" with Einar Andreas
+Rødland being the current maintainer.
+';
+}
+