From 02635c6770f599da3dfbbd49e6bd3f3c85eb20f0 Mon Sep 17 00:00:00 2001 From: David Runge Date: Sun, 2 Jul 2017 19:45:55 +0200 Subject: thesis/thesis.tex: Elaborating on Dynamic Binaural (Room) Synthesis and (Higher Order) Ambisonics Amplitude Panning and NFC-HOA. --- thesis/thesis.tex | 65 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/thesis/thesis.tex b/thesis/thesis.tex index a757ca9..a042e43 100644 --- a/thesis/thesis.tex +++ b/thesis/thesis.tex @@ -48,6 +48,7 @@ parskip=never]{paper} \newacronym{api}{API}{Application programming interface} \newacronym{asdf}{ASDF}{Audio Scene Description Format} \newacronym{bs}{BS}{Binaural Synthesis} +\newacronym{brir}{BRIR}{Binaural Room Impulse Response} \newacronym{brs}{BRS}{Binaural Room Synthesis} \newacronym{cc}{CC}{Creative Commons} \newacronym{cicm}{CICM}{Centre de recherche Informatique et Création Musicale} @@ -56,6 +57,8 @@ parskip=never]{paper} \newacronym{fdl}{FDL}{GNU Free Documentation License} \newacronym{gpl}{GPL}{GNU General Public License} \newacronym{gui}{GUI}{Graphical user interface} +\newacronym{hrir}{HRIR}{Head Related Impulse Response} +\newacronym{hrtf}{HRTF}{Head Related Transfer Function} \newacronym{ide}{IDE}{Integrated Development Environment} \newacronym{lgpl}{LGPL}{GNU Lesser General Public License} \newacronym{lts}{LTS}{Long Term Support} @@ -63,7 +66,7 @@ parskip=never]{paper} \newacronym{ip}{IP}{Internet Protocol} \newacronym{jack}{JACK}{JACK Audio Connection Kit} \newacronym{madi}{MADI}{Multichannel Audio Digital Interface} -\newacronym{nfc-hoa}{NFC-HOA}{Near-field-corrected Higher Order Ambisonics} +\newacronym{nfc-hoa}{NFC-HOA}{Near-field-compensated Higher Order Ambisonics} \newacronym{oop}{OOP}{Object-oriented Programming} \newacronym{os}{OS}{Operating System} \newacronym{osc}{OSC}{Open Sound Control} @@ -257,11 +260,59 @@ parskip=never]{paper} section about spatial audio renderer appliances and followed by one about free software and its pitfalls. - \subsection{Spatial audio renderers and their appliance} + \subsection{Spatial audio rendering algorithms and their appliances} \label{subsec:spatialaudiorenderersandtheirappliance} + In the following subsubsections several spatial audio rendering + algorithms are introduced briefly. + As they serve as a mere introductory, they are merged where applicable. - \subsubsection{Binaural (Room) Synthesis} + \subsubsection{Dynamic Binaural Synthesis and Dynamic Binaural Room + Synthesis} \label{subsubsec:binaural} + \gls{bs} describes a stereophonic audio reproduction, in which - + usually using headphones - acoustic signals are recreated at the ears + of the listener.\\ + For humans, sound source localization and distance estimation takes + place according to auditory cues from each ear. The signals perceived + by inner and outer ear, are correlated by the brain, to account for + locations in all three dimensions and their distances from the + listener.\\ + The differences between the cues perceived by each ear can be measured + as a \gls{hrir} for every human individually (as it is dependant on + physiology). Its Fourier transform, the \gls{hrtf}, can then be used + to modify audio signals to become a directional audio source, perceived + as in free field conditions.\\ + \gls{brs} is a special form of \gls{bs}, in which \glspl{brir}, encode + all of the virtual source's characteristics, such as position, + alongside the room's acoustic characteristics. + This way, recordings from real rooms can be reproduced authentically.\\ + \glspl{hrir} and \glspl{brir} are by default applied seperately for + each ear, therefore, if a resolution of 1\textdegree~is desired, it + can only be achieved by a set of 720 impulse responses. + + \subsubsection{(Higher Order) Ambisonics Amplitude Panning and + Near-field-compensated Higher Order Ambisonics} + \label{subsubsec:aapandnfchoa} + \gls{aap} and \gls{hoa} are spatial rendering algorithms, that + reproduce audio on multi-speaker setups. Those are usually circular or + spherical.\\ + Depending on a loudspeaker's position in the setup, relative to the + spheres's center (the listening area or \textit{sweet spot}), a linear + combination of all loudspeakers is used to achieve a localized + representation of a virtual sound source.\\ + The relatively small listening area can be extended by using additional + sets of loudspeakers, which in turn lead to more spatial aliasing.\\ + Due to the perceptebility of localization cues, mentioned in + \rer{subsubsec:binaural}, it is required to apply spatial equalization + for the rendered sources, to account for differences in low- and + high-frequency localization capabilities of the human ear.\\ + For ambisonics, plane-wave sources are assumed. Due to the proximity + effect, this leads to a bass boost in the listening area. \gls{nfc-hoa} + accounts for this by a set of driving functions, applying a per speaker + near-field compensation.\\ + + \subsubsection{Vector Based Amplitude Panning} + \label{subsubsec:vbap} \subsubsection{Wave Field Synthesis} \label{subsubsec:wavefieldsynthesis} @@ -269,13 +320,11 @@ parskip=never]{paper} aims at synthesizing a sound field of desired acoustic preference in a given listening area, assuming a planar reproduction to be most suitable for most applications.\\ - \gls{wfs} is typically implemented using a curved or linear loudspeaker - array surrounding the listening area.\\ + \gls{wfs} is typically implemented using a circular, rectangular or + linear loudspeaker array surrounding or fronting the listening area.\\ Several free and open-source renderer applications exist for \gls{wfs} environments, with varying stages of feature richness.\\ - \subsubsection{Higher order ambisonics and vector based amplitude panning} - \label{subsubsec:hoaandvbap} \subsection{sWONDER} \label{subsec:swonder} @@ -318,7 +367,7 @@ parskip=never]{paper} {https://puredata.info/downloads/mrpeach}} or \textit{IEMnet}\footnote{ \href{https://puredata.info/downloads/iemnet} {https://puredata.info/downloads/iemnet}}.\\ - + \subsection{3Dj (SuperCollider Quark)} \label{subsec:3dj_supercollider_quark} 3Dj is a \gls{supercollider} \gls{quark} conceived in the course of a Master -- cgit v1.2.3-54-g00ecf