summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--thesis/thesis.tex65
1 files changed, 57 insertions, 8 deletions
diff --git a/thesis/thesis.tex b/thesis/thesis.tex
index a757ca9..a042e43 100644
--- a/thesis/thesis.tex
+++ b/thesis/thesis.tex
@@ -48,6 +48,7 @@ parskip=never]{paper}
\newacronym{api}{API}{Application programming interface}
\newacronym{asdf}{ASDF}{Audio Scene Description Format}
\newacronym{bs}{BS}{Binaural Synthesis}
+\newacronym{brir}{BRIR}{Binaural Room Impulse Response}
\newacronym{brs}{BRS}{Binaural Room Synthesis}
\newacronym{cc}{CC}{Creative Commons}
\newacronym{cicm}{CICM}{Centre de recherche Informatique et Création Musicale}
@@ -56,6 +57,8 @@ parskip=never]{paper}
\newacronym{fdl}{FDL}{GNU Free Documentation License}
\newacronym{gpl}{GPL}{GNU General Public License}
\newacronym{gui}{GUI}{Graphical user interface}
+\newacronym{hrir}{HRIR}{Head Related Impulse Response}
+\newacronym{hrtf}{HRTF}{Head Related Transfer Function}
\newacronym{ide}{IDE}{Integrated Development Environment}
\newacronym{lgpl}{LGPL}{GNU Lesser General Public License}
\newacronym{lts}{LTS}{Long Term Support}
@@ -63,7 +66,7 @@ parskip=never]{paper}
\newacronym{ip}{IP}{Internet Protocol}
\newacronym{jack}{JACK}{JACK Audio Connection Kit}
\newacronym{madi}{MADI}{Multichannel Audio Digital Interface}
-\newacronym{nfc-hoa}{NFC-HOA}{Near-field-corrected Higher Order Ambisonics}
+\newacronym{nfc-hoa}{NFC-HOA}{Near-field-compensated Higher Order Ambisonics}
\newacronym{oop}{OOP}{Object-oriented Programming}
\newacronym{os}{OS}{Operating System}
\newacronym{osc}{OSC}{Open Sound Control}
@@ -257,11 +260,59 @@ parskip=never]{paper}
section about spatial audio renderer appliances and followed by one about
free software and its pitfalls.
- \subsection{Spatial audio renderers and their appliance}
+ \subsection{Spatial audio rendering algorithms and their appliances}
\label{subsec:spatialaudiorenderersandtheirappliance}
+ In the following subsubsections several spatial audio rendering
+ algorithms are introduced briefly.
+ As they serve as a mere introductory, they are merged where applicable.
- \subsubsection{Binaural (Room) Synthesis}
+ \subsubsection{Dynamic Binaural Synthesis and Dynamic Binaural Room
+ Synthesis}
\label{subsubsec:binaural}
+ \gls{bs} describes a stereophonic audio reproduction, in which -
+ usually using headphones - acoustic signals are recreated at the ears
+ of the listener.\\
+ For humans, sound source localization and distance estimation takes
+ place according to auditory cues from each ear. The signals perceived
+ by inner and outer ear, are correlated by the brain, to account for
+ locations in all three dimensions and their distances from the
+ listener.\\
+ The differences between the cues perceived by each ear can be measured
+ as a \gls{hrir} for every human individually (as it is dependant on
+ physiology). Its Fourier transform, the \gls{hrtf}, can then be used
+ to modify audio signals to become a directional audio source, perceived
+ as in free field conditions.\\
+ \gls{brs} is a special form of \gls{bs}, in which \glspl{brir}, encode
+ all of the virtual source's characteristics, such as position,
+ alongside the room's acoustic characteristics.
+ This way, recordings from real rooms can be reproduced authentically.\\
+ \glspl{hrir} and \glspl{brir} are by default applied seperately for
+ each ear, therefore, if a resolution of 1\textdegree~is desired, it
+ can only be achieved by a set of 720 impulse responses.
+
+ \subsubsection{(Higher Order) Ambisonics Amplitude Panning and
+ Near-field-compensated Higher Order Ambisonics}
+ \label{subsubsec:aapandnfchoa}
+ \gls{aap} and \gls{hoa} are spatial rendering algorithms, that
+ reproduce audio on multi-speaker setups. Those are usually circular or
+ spherical.\\
+ Depending on a loudspeaker's position in the setup, relative to the
+ spheres's center (the listening area or \textit{sweet spot}), a linear
+ combination of all loudspeakers is used to achieve a localized
+ representation of a virtual sound source.\\
+ The relatively small listening area can be extended by using additional
+ sets of loudspeakers, which in turn lead to more spatial aliasing.\\
+ Due to the perceptebility of localization cues, mentioned in
+ \rer{subsubsec:binaural}, it is required to apply spatial equalization
+ for the rendered sources, to account for differences in low- and
+ high-frequency localization capabilities of the human ear.\\
+ For ambisonics, plane-wave sources are assumed. Due to the proximity
+ effect, this leads to a bass boost in the listening area. \gls{nfc-hoa}
+ accounts for this by a set of driving functions, applying a per speaker
+ near-field compensation.\\
+
+ \subsubsection{Vector Based Amplitude Panning}
+ \label{subsubsec:vbap}
\subsubsection{Wave Field Synthesis}
\label{subsubsec:wavefieldsynthesis}
@@ -269,13 +320,11 @@ parskip=never]{paper}
aims at synthesizing a sound field of desired acoustic preference in a
given listening area, assuming a planar reproduction to be most
suitable for most applications.\\
- \gls{wfs} is typically implemented using a curved or linear loudspeaker
- array surrounding the listening area.\\
+ \gls{wfs} is typically implemented using a circular, rectangular or
+ linear loudspeaker array surrounding or fronting the listening area.\\
Several free and open-source renderer applications exist for \gls{wfs}
environments, with varying stages of feature richness.\\
- \subsubsection{Higher order ambisonics and vector based amplitude panning}
- \label{subsubsec:hoaandvbap}
\subsection{sWONDER}
\label{subsec:swonder}
@@ -318,7 +367,7 @@ parskip=never]{paper}
{https://puredata.info/downloads/mrpeach}} or \textit{IEMnet}\footnote{
\href{https://puredata.info/downloads/iemnet}
{https://puredata.info/downloads/iemnet}}.\\
-
+
\subsection{3Dj (SuperCollider Quark)}
\label{subsec:3dj_supercollider_quark}
3Dj is a \gls{supercollider} \gls{quark} conceived in the course of a Master