\documentclass[12pt, a4paper, oneside, titlepage, listof=totoc, headinclude, open=right, glossaries=totoc, captions=tableheading, final, BCOR=10mm, parskip=never]{paper} \usepackage[ngerman,english]{babel} \usepackage[utf8]{inputenc} \usepackage[T1]{fontenc} \usepackage{textcomp} \usepackage[usenames,dvipsnames,rgb,svgnames,table]{xcolor} \definecolor{osc-out}{RGB}{150,0,255} \definecolor{osc-in}{RGB}{0,0,255} \definecolor{audio-in}{RGB}{255,0,0} \definecolor{audio-out}{RGB}{0,206,0} \definecolor{pubsub-in}{RGB}{128,0,0} \definecolor{controller-in}{RGB}{0,128,0} \definecolor{table-background-one}{RGB}{194,195,194} \definecolor{table-background-two}{RGB}{210,213,210} \usepackage[newfloat=true]{minted} \usepackage{mdframed} \usepackage{fancyhdr} \usepackage{url} \usepackage{graphicx} \usepackage{pdfpages} \usepackage[author={David Runge}]{pdfcomment} \usepackage{mathtools} \usepackage{float} \floatstyle{boxed} \restylefloat{figure} \usepackage{tocloft} \setcounter{secnumdepth}{4} \usepackage[toc,page]{appendix} \usepackage[nottoc,numbib]{tocbibind} \usepackage{hyperref} \hypersetup{hidelinks, colorlinks = false, pdfauthor={David Runge}, pdftitle={Master's Thesis: A Networking Extension for the SoundScape Renderer}, pdfsubject={A Networking Extension for the SoundScape Renderer}, pdfkeywords={Ambisonics, Binaural Synthesis, C++, Client-Server-Architecture, Linux, macOS, Networking, OSC, liblo, SoundScape Renderer, Spatial Audio, SuperCollider, Technische Universität Berlin, VBAP, WFS} } \usepackage[authoryear,round]{natbib} % caption \usepackage[font=scriptsize]{caption} % glossary \usepackage[acronym,nonumberlist,toc,xindy]{glossaries} \makeglossaries \newacronym{aap}{AAP}{Ambisonics Amplitude Panning} \newacronym{adat}{ADAT}{Alesis Digital Audio Tape} \newacronym{alsa}{ALSA}{Advanced Linux Sound Architecture} \newacronym{apf}{APF}{Audio Processing Framework} \newacronym{api}{API}{Application Programming Interface} \newacronym{asdf}{ASDF}{Audio Scene Description Format} \newacronym{bs}{BS}{Binaural Synthesis} \newacronym{brir}{BRIR}{Binaural Room Impulse Response} \newacronym{brs}{BRS}{Binaural Room Synthesis} \newacronym{cc}{CC}{Creative Commons} \newacronym{cicm}{CICM}{Centre de recherche Informatique et Création Musicale} \newacronym{cnmat}{CNMAT}{Center for New Music and Audio Technologies} \newacronym{cpu}{CPU}{Central Processing Unit} \newacronym{fdl}{FDL}{GNU Free Documentation License} \newacronym{gpl}{GPL}{GNU General Public License} \newacronym{gui}{GUI}{Graphical User Interface} \newacronym{hrir}{HRIR}{Head Related Impulse Response} \newacronym{hrtf}{HRTF}{Head Related Transfer Function} \newacronym{iana}{IANA}{Internet Assigned Numbers Authority} \newacronym{ide}{IDE}{Integrated Development Environment} \newacronym{lgpl}{LGPL}{GNU Lesser General Public License} \newacronym{lts}{LTS}{Long Term Support} \newacronym{hoa}{HOA}{Higher Order Ambisonics} \newacronym{ip}{IP}{Internet Protocol} \newacronym{jack}{JACK}{JACK Audio Connection Kit} \newacronym{madi}{MADI}{Multichannel Audio Digital Interface} \newacronym{midi}{MIDI}{Musical Instrument Digital Interface} \newacronym{nfc-hoa}{NFC-HOA}{Near-Field-Compensated Higher Order Ambisonics} \newacronym{oop}{OOP}{Object-Oriented Programming} \newacronym{os}{OS}{Operating System} \newacronym{osc}{OSC}{Open Sound Control} \newacronym{posix}{POSIX}{Portable Operating System Interface} \newacronym{pubsub}{PubSub}{Publish-Subscribe message pattern} \newacronym{pd}{Pd}{PureData} \newacronym{raii}{RAII}{Ressource Acquisition Is Initialization} \newacronym{ssr}{SSR}{SoundScape Renderer} \newacronym{tcp}{TCP}{Transmission Control Protocol} \newacronym{tu-berlin}{TU Berlin}{Technische Universität Berlin} \newacronym{udp}{UDP}{User Datagram Protocol} \newacronym{vbap}{VBAP}{Vector Based Amplitude Panning} \newacronym{wfs}{WFS}{Wave Field Synthesis} \newacronym{xml}{XML}{Extensible Markup Language} \newglossaryentry{ascii}{ name={ASCII}, description={American Standard Code for Information Interchange --- a character encoding standard} } \newglossaryentry{sclang}{ name={sclang}, description={Name of the SuperCollider programming language and the interpreter executable of the SuperCollider programming language} } \newglossaryentry{id}{ name={ID}, description={A name or number, that identifies an object}, plural=IDs } \newglossaryentry{stdout}{ name={stdout}, description={The standard output is a stream where a program writes its output data to. This can be a log file or a terminal} } \newglossaryentry{faust}{ name={FAUST}, description={Functional Audio Stream is a functional programming language specifically designed for realtime signal processing and synthesis} } \newglossaryentry{quark}{ name={Quark}, description={Name for Classes extending the SuperCollider programming language, usually developed in a separate version controlled code repository}, plural=Quarks } \newglossaryentry{supercollider}{ name={SuperCollider}, description={A programming language, \gls{ide} and synthesis server for realtime audio processing and synthesis} } \newglossaryentry{python}{ name={Python}, description={A multi-purpose, object-oriented programming language} } \newglossaryentry{qt4}{ name={Qt4}, description={Version 4 (legacy) of the cross-platform application framework for creating desktop applications} } \newglossaryentry{qt5}{ name={Qt5}, description={Version 5 of the cross-platform application framework for creating desktop applications} } \graphicspath{{../images//}} \begin{document} \pagestyle{empty} \begin{titlepage} \centering {\huge\bfseries A Networking Extension for the SoundScape Renderer\par} \vspace{1cm} {\scshape\Large Master's Thesis\par} \vspace{1.5cm} \includegraphics[width=0.3\textwidth]{tu-berlin-logo.pdf}\par\vspace{1cm} Technische Universität Berlin\\ Fakultät I - Geisteswissenschaften\\ Fachgebiet Audiokommunikation\\ Audiokommunikation und -technologie M.Sc.\\ \vspace{1.5cm} \begin{table}[!htb] \begin{tabular}{l l} \item \textbf{Vorgelegt von}: &David Runge\\ \item \textbf{Matrikelnummer}: &340592\\ \item \textbf{E-Mail}: &\href{dave@sleepmap.de}{dave@sleepmap.de}\\ \end{tabular} \end{table} \begin{table}[!htb] \begin{tabular}{l l} \item \textbf{Erstgutachter}: &Prof. Dr. Stefan Weinzierl\\ \item \textbf{Zweitgutachter}: &Henrik von Coler\\ \item \textbf{Datum}: &\today\\ \end{tabular} \end{table} \end{titlepage} \nextpage \mbox{} \cleardoublepage \nextpage \section*{Eidesstattliche Erklärung} \vspace{1cm} Hiermit erkläre ich, dass ich die vorliegende Arbeit selbstständig und eigenhändig sowie ohne unerlaubte fremde Hilfe und ausschließlich unter Verwendung der aufgeführten Quellen und Hilfsmittel angefertigt habe.\\ Berlin, den \today\par\\ \vspace{2cm} \noindent\ldots\ldots\ldots\ldots\ldots\ldots\ldots\ldots\ldots\ldots\ldots\\ David Runge \newpage \begin{abstract} Different types of software have been designed in the field of spatial audio reproduction to cope with the needs of several rendering algorithms, conceived over the last century. Their feasibility and reproducability in a real environment have been evaluated and tested and their implementations extended, or improved accordingly.\\ Today, spatial audio rendering software is often a single-purpose application, that attends to the needs of a specific setup in place. Unfortunately, to some of these solutions, no further work is applied, leaving them unsupported at some point in time. The affected hardware setups range from those for large scale Wave Field Synthesis to smaller ones, found in scientific research, involving Dynamic Binaural Synthesis. All of them are in need of extensively supported and reliable software, that can even be used, once the operating system is changing. This can happen, if the hardware has to be upgraded due to old age or if new software is required, that can not be built on older operating systems.\\ In the following work, a set of current free and open-source realtime spatial audio renderers, actively used in scientific and artistic contexts, is evaluated for usability, applicability and realtime context: sWONDER, HoaLibrary (for PureData), 3Dj (for SuperCollider), WFSCollider and the SoundScape Renderer. The latter --- in contrast to the other candidates, a multi-purpose renderer --- is chosen for a rewrite of its network based messaging system, as the application implements many different rendering algorithms, while still being actively maintained.\\ Its new functionality allows it to be used in other environments: While rendering the same virtual audio scene, large scale multi-loudspeaker setups, in which several instances work collectively, are possible as well as networked individual setups.\\ The SoundScape Renderer's new networking extension, along with a client-server architecture, its messaging system with tests and workflow examples is elaborated. New setup possibilities are contrasted with the automation available through the network interface currently in use.\\ Closing, an outlook on future work with the help of the new networking extension and general improvement suggestions for the SoundScape Renderer are discussed. \end{abstract} \newpage \begin{otherlanguage}{ngerman} \begin{abstract} Unterschiedlichste Software wurde im Feld der Raumklangsteuerung geschrieben, um die vielzähligen Algorithmen des letzten Jahrhunderts abbilden zu können. Die Praktikabilität und Reproduzierbarkeit dieser Algorithmen in einer realen Umgebung wurde evaluiert und getetest und ihre Implementationen entsprechend erweitert, oder verbessert.\\ Heutige Raumklangsteuerungssoftware ist häufig eine Einzweck-Software, welche einem spezifischen Aufbau dient. Unglücklicherweise werden einige dieser Lösungen nicht weiter entwickelt, was zukünftig zwangsläufig zu ihrer Unbrauchbarkeit führt. Die betroffenen Hardwareaufbauten reichen von großen Anlagen, wie jene für die Umsetzung einer Wellenfeldsynthese zu kleineren, in der wisschenaftlichen Forschung auffindbare, die Dynamische Binauralsynthese anwenden. All diese Hardwareumgebungen benötigen Software, die weitläufig unterstützt ist und verlässlich arbeitet, auch wenn die Betriebssysteme, auf denen sie genutzt werden, sich verändern. Diese Umstände können eintreten, wenn veraltete Hardware ausgewechselt werden muss, oder neuere Software benötigt wird, die nicht auf älteren Betriebssystemen einsetzbar ist.\\ In der nachfolgenden Arbeit wird die folgende Sammlung aus freien und quelloffenen Anwendungen zur Echtzeit-Raumklangsteuerung, die derzeit aktiv in künstlerischen und wisschenschaftlichen Umgebungen Anwendung finden, anhand ihrer Nutzungsmöglichkeiten, ihrer Anwendbarkeitkeit und ihrer Umgebung evaluiert: sWONDER, HoaLibrary (für PureData), 3Dj (für SuperCollider), WFSCollider und der SoundScape Renderer. Der letztere --- im Gegensatz zu den anderen Kandidaten ein Mehrzweck-Werkzeug --- wurde für eine Neuausarbeitung seiner Netzwerkfähigkeit gewählt, aufgrund seiner vielzähligen Implementationen von Raumklang-Algorithmen und seiner noch betriebenen Weiterentwicklung.\\ Seine neue Funktionalität erlaubt es ihm in anderen Umgebungen eingesetzt zu werden: Während die gleiche virtuelle Audio-Szene verräumlicht wird, ist die Anwendung nun gleichermaßen einsetzbar in einer Großanlage, in der viele Instanzen zusammen arbeiten und vernetzte, individuellen Aufbauten, die getrennt voneinander arbeiten.\\ Die neuen Netzwerkmöglichkeiten der Anwendung, zusammen mit einer Client-Server Architektur, sowie seines Nachrichtensystem, anhand von Arbeitsabläufen und Tests, werden ausgearbeitet. Neue Möglichkeiten der Vernetzung werden der derzeitig verwendeten Netzwerkschnittstelle gegenüber gestellt.\\ Abschließend werden ein Ausblick auf weitere Arbeit anhand der neuen Netzwerkschnittstelle und generelle Verbesserungsvorschläge für den SoundScape Renderer diskutiert. \end{abstract} \end{otherlanguage} \newpage \section*{Acknowlegdements} I would like to thank my supervisors Prof. Dr. Stefan Weinzierl and Henrik von Coler for their tremendous efforts in making this work happen.\\ Special thanks to Henrik for pushing in the right moments.\\ \\ I would also like to thank Marc Voigt for bouncing off ideas every once in a while and helping in realizing a test setup. Hopefully this work will be useful for the setups you have to maintain in the future.\\ \\ Thanks to Matthias Geier for being relentless and yet supportive.\\ \\ Special thanks to my family for their loving support over all of this time spent.\\ Thank you Nanni, for taking my mind off.\\ \\ Thanks to Sabine and Peter for reading through all of this nonsense.\\ \\ To all, whom I forgot: You know, who you are. I could not have done it without you. \newpage \pagestyle{empty} { \setcounter{tocdepth}{4} \renewcommand{\thispagestyle}[1]{} \tableofcontents \newpage } \pagestyle{headings} \setcounter{page}{1} \pagenumbering{arabic} \renewcommand{\theFancyVerbLine} {\textcolor{gray}{\scriptsize\arabic{FancyVerbLine}}} \RecustomVerbatimEnvironment{Verbatim}{Verbatim}{xleftmargin=5mm} \section{Introduction} \label{sec:introduction} From the early days of stereo audio reproduction onwards, different kinds of spatial audio reproduction techniques have been developed and established, ranging from plain stereophony to three-dimensional, multi-channel setups. Their applications range from research to artistic and conventionally commercial fields, such as cinema and home entertainment.\\ With the rise of dynamic two and three-dimensional rendering algorithms (see~\ref{subsec:spatialaudiorenderingalgorithms}), the need for specialized software, implementing them, grew. Opposed to encoding of spatial information of sources in only two channels (static in the case of commercially produced audio for radio and film) encoding for massive multi loudspeaker systems would not be feasible, when done statically, or not applicable in the case of dynamic setups, reacting to user input in realtime.\\ Early dedicated hardware implementations, such as the \textit{Halaphon}, designed by Hans Peter Haller and Peter Lawo \citep[p.78f]{book:haller1995}, started out as basic spatial dispersion systems for quadrophonic loudspeaker setups, based on amplitude pannings using envelopes. Due to huge interest from artists in this new technique, these systems were soon expanded to cope with eight and more channels.\\ A notable piece, making use of a later revision of the \textit{Halaphon}, is Luigi Nono's \textit{Prometeo}. For it, the componist developed the \textit{coro lantissimo}: A choir singing at a great distance. To achieve the effect --- in a usually very dampened orchestra house --- the spatialization system was used to add between eight to 15 seconds of reverberation time in the prolog and up to 20 seconds in the second part of the piece. This enabled a sung \textit{fivefold pianissimo} and a \textit{triple pianissimo} (respectively) to be perceived as coming from a larger distance than the room's dimension \citep[p. 91f]{book:haller1995}.\\ This early example of a spatial audio renderer already illustrates the close vicinity of applied scientific research in experimental electronic music studios and that of artistic work, facilitating live electronics.\\ With the fast technological development of computer systems, the dedicated solutions shifted more into the digital domain and finally towards software solutions. This effectively allowed a stronger focus on specializing and refining the algorithms in use.\\ Spatial audio rendering software exists for different \glspl{os}, in several stages of completeness and feature richness, while covered by free (see~\ref{subsec:whyfreesoftwarematters}) and non-free licenses. The following work focusses on free software, used in scientific research and artistic contexts. Several spatial audio renderers, currently in use, were evaluated and compared (see~\ref{sec:freespatialaudiorenderers}), of which one was chosen for extension.\\ Some spatial audio renderers are single-purpose applications, conceived for a specific (and often quite rare) loudspeaker setup, such as those used for \gls{wfs} or \gls{hoa}. An example of this is the large scale system at \gls{tu-berlin} \citep{website:tu-wfs} or HAW Hamburg. The \gls{ssr} is a multi-purpose spatial audio renderer, developed at the \gls{tu-berlin}. To improve its usability and networking capabilities, a new networking extension was developed, facilitating an \gls{osc} based messaging system, that incorporates features for distributed processing in massive multi-loudspeaker setups.\\ \cleardoublepage \section{Free and Open-source Spatial Audio Renderers} \label{sec:freespatialaudiorenderers} \gls{jack} \citep{website:jackaudio2016} is a low-latency audio server, that allows for software using its environment to connect their in- and outputs with any other application using it. It is licensed under the \gls{gpl} and can be built for various \glspl{os} (e.g. Linux, macOS, Windows). As of today, a plethora of applications exist, that extend \gls{jack}'s functionality graphically, or make use of it musically and productively. Due to the large set of audio drivers it can use (i.e. \gls{alsa}, coreaudio, freebob, oss sun and portaudio) and its general availability, the audio server has become the de-facto standard for free and open-source, production ready applications on all major \glspl{os}.\\ To date there exist five (known of) free and open-source spatial audio renderers, which are all \gls{jack} clients: \begin{itemize} \item sWONDER \citep{phdthesis:baalman2007}, developed at the \gls{tu-berlin}, Germany \item WFSCollider \citep{website:wfscollider}, developed by the Game Of Life Foundation \citep{website:gameoflife}, The Hague, Netherlands \item HoaLibrary for \gls{pd} \citep{github:hoalibraryforpd} developed at the \gls{cicm}, Paris, France \item 3Dj for \gls{supercollider} \citep{mastersthesis:perezlopez2014}, developed at the Universitat Pompeu Fabra, Barcelona \item \gls{ssr} \citep{website:ssr2016}, developed at the Quality \& Usability Lab, Telekom Innovation Laboratories, \gls{tu-berlin} and Institut für Nachrichtentechnik, Universität Rostock and Division of Applied Acoustics, Chalmers University of Technology \end{itemize} Different concepts and contexts apply to all of the renderers, which are briefly explained in the following sections, prefixed by a section about spatial audio rendering algorithms and followed by one about free software and its pitfalls. \subsection{Spatial Audio Rendering Algorithms} \label{subsec:spatialaudiorenderingalgorithms} In the following subsections several spatial audio rendering algorithms are introduced briefly. As they serve as a mere introduction, they were merged where applicable. \subsubsection{Dynamic Binaural Synthesis and Dynamic Binaural Room Synthesis} \label{subsubsec:binaural} \gls{bs} describes a stereophonic audio reproduction, in which --- usually using headphones --- acoustic signals are recreated at the ears of the listener.\\ For humans, sound source localization and distance estimation takes place according to auditory cues from each ear. The signals perceived by inner and outer ear are correlated by the brain, to account for locations in all three dimensions and their distances from the listener.\\ The differences between the cues perceived by each ear can be measured as a \gls{hrir} for every human individually (as it is dependant on physiology). Its Fourier transform, the \gls{hrtf}, can then be used to modify audio signals to become a directional audio source, perceived as in free field conditions.\\ \gls{brs} is a special form of \gls{bs}, in which \glspl{brir}, encode all of the virtual source's characteristics, such as position, alongside the room's acoustic characteristics. This way, recordings from real rooms can be reproduced authentically.\\ \glspl{hrir} and \glspl{brir} are by default applied seperately for each ear. Therefore, if a resolution of 1\textdegree~is desired, it can be achieved by a set of 720 impulse responses, that are applied to the source with the help of a head tracker, measuring the azimuth of the listener towards it. \subsubsection{(Higher Order) Ambisonics Amplitude Panning and Near-Field-Compensated Higher Order Ambisonics} \label{subsubsec:aapandnfchoa} \gls{aap} and \gls{hoa} are spatial rendering algorithms, that reproduce audio on multi-speaker setups. Those are usually circular or spherical.\\ Depending on a loudspeaker's position in the setup, relative to the spheres's center (the listening area or \textit{sweet spot} \citep[Fig. 1.4]{phdthesis:wierstorf2014}), a linear combination of all loudspeakers is used to achieve a localized representation of a virtual sound source.\\ The relatively small listening area can be extended by using additional sets of loudspeakers, which in turn lead to more spatial aliasing.\\ Due to the perceptibility of localization cues, mentioned in~\ref{subsubsec:binaural}, it is required to apply spatial equalization for the rendered sources, to account for differences in low- and high-frequency localization capabilities of the human ear.\\ For ambisonics, plane-wave sources are assumed, which means their distance is infinite. Due to the proximity effect, this leads to a bass boost in the listening area. \gls{nfc-hoa} accounts for this by a set of driving functions, applying a per speaker near-field compensation.\\ \subsubsection{Vector Based Amplitude Panning} \label{subsubsec:vbap} \gls{vbap} is another rendering method for multiple loudspeakers. Up to three loudspeakers are used to reproduce a virtual sound source in a three-dimensional setup, while only two are needed in a horizontal one.\\ It enables for “virtual source positioning in a three-dimensional sound field formed by loudspeakers in an arbitrary three-dimensional placement“, while being ”computationally efficient and accurate“ \citep[p. 464]{inproceedings:pulkki1997}.\\ However, according to \citet{inproceedings:geierandspors2012} ”\gls{vbap} has a very small sweet spot, out of which localization of sources is distorted towards the nearest active loudspeaker“ and ”works best for circular setups“. \subsubsection{Wave Field Synthesis} \label{subsubsec:wavefieldsynthesis} \gls{wfs} is a spatial audio rendering technique, which is based on the Huygens-Fresnel principle. It states that any wave front can be synthesized by the superposition of elementary spherical waves.\\ Setups mainly focus on horizontal, preferably spatially discrete, speaker arrays of rectangular or circular shape as the human hearing is most capable of localizing acoustic sources in this plane.\\ According to \citet{inproceedings:wierstorfetal2012}, localization is accurately and evenly distributed in the listening area with loudspeaker spacings of up to 40cm.\\ Although \gls{wfs} does not suffer from a pronounced sweet spot, and spatial aliasing is distributed over a relatively large listening area, compared to e.g.\ \gls{nfc-hoa}, the spatial sampling artifacts may still be perceived as coloration of the sound field, which can be improved by prefiltering especially high-frequency content \citep{phdthesis:wittek2007}.\\ Due to the relatively high amount of loudspeakers (and thereby computing power to calculate as many audio channels) needed for a medium to large-scale setup, \gls{wfs} is not yet very widely distributed. \subsection{sWONDER} \label{subsec:swonder} sWONDER \citep{phdthesis:baalman2007} consists of a set of C++ applications that provide \gls{bs} and \gls{wfs} rendering. In 2007 it was specifically redesigned \citep{inproceedings:baalmanetal2007} to cope with large scale \gls{wfs} setups in which several (computer) nodes, providing several speakers each, drive a system together.\\ In these setups each node receives all available audio streams (which represent one virtual audio source respectively) redundantly and a master application signals which node is responsible for rendering what source on which speaker.\\ It uses \gls{osc} for messaging between its components and for setting its controls. Additionally, it can be controlled through a \gls{gui}, that was specifically designed for it.\\ Sound sources can be moved dynamically, or according to an \gls{xml} based score.\\ For example sWONDER has been in use for the medium and large scale \gls{wfs} systems in the Electronic Music Studio \citep{website:tu-electronic_studio} and lecture hall H0103 \citep{website:tu-wfs} at \gls{tu-berlin} and a medium scale system at the Wave Field Synthesis Lab at HAW in Hamburg \citep{inbook:fohl2013}.\\ The included convolution engine fWonder is applied in “Assessing the Authenticity of Individual Dynamic Binaural Synthesis” \citep[pp. 223-246]{phdthesis:lindau2014}.\\ Unfortunately, the spatial audio renderer has not been actively maintained for several years. Hence it is limited to its two rendering algorithms and has many bugs, that are not likely to get fixed in the future.\\ \subsection{HoaLibrary (PureData extension)} \label{subsec:hoalibrary_puredata_extension} The HoaLibrary is “a collection of C++ and \gls{faust} classes and objects for Max, PureData and VST destined to high order ambisonics sound reproduction” \citep{website:hoalibrary}. By the extension for \gls{pd} \citep{inproceedings:puckette1997}, it enables for \gls{hoa} reproduction, while harnessing the rich feature set of the audio programming language still enables for implementing other forms of spatial rendering alongside the HoaLibrary.\\ \gls{pd} is \gls{osc} capable with the help of extensions, such as \textit{mrpeach}\footnote{ \href{https://puredata.info/downloads/mrpeach} {https://puredata.info/downloads/mrpeach}} or \textit{IEMnet}\footnote{ \href{https://puredata.info/downloads/iemnet} {https://puredata.info/downloads/iemnet}}.\\ \subsection{3Dj (SuperCollider Quark)} \label{subsec:3dj_supercollider_quark} 3Dj is a \gls{supercollider} \gls{quark} conceived in the course of a Master's Thesis at Universitat Pompeu Fabra, Barcelona \citep{mastersthesis:perezlopez2014} for interactive performance live spatialization purposes. It implements \gls{hoa} and \gls{vbap} rendering \citep[p 45]{mastersthesis:perezlopez2014} and uses a specific scene format \citep[pp. 45--46]{mastersthesis:perezlopez2014} to allow sound sources to have static, linear, random, brownian, simple harmonic and orbital motion.\\ Due to being a language extension to \gls{sclang}, 3Dj can be used in conjunction with other spatial rendering algorithms provided by \gls{supercollider} or any of its \glspl{quark}.\\ \gls{supercollider} is \gls{osc} enabled by default, which renders 3Dj a dynamically accessible solution. \subsection{WFSCollider} \label{subsec:wfscollider} WFSCollider was built on top of \href{https://supercollider.github.io}{SuperCollider} 3.5 \citep{website:supercollider} and as its name suggests, it is an application for \gls{wfs} reproduction. It “allows soundfiles, live input and synthesis processes to be placed in a score editor where start times, and durations can be set and trajectories or positions assigned to each event. It also allows realtime changement of parameters and on the fly starting and stopping of events via \gls{gui} or \gls{osc} control. Each event can be composed of varous objects (“units”) in a processing chain“ \citep{website:wfscollider}. According to its current manual, it is also capable of using a \gls{vbap} renderer for other multi-speaker setups \citep[p. 8]{manual:wfscollider}.\\ ”WFSCollider is the driving software of the Wave Field Synthesis system of the Game Of Life Foundation“ \citep{website:gameoflife}. In multi-computer setups, it can synchronize the involved processes and a dynamic latency can be introduced to account for high network throughput \citep[p. 22]{manual:wfscollider}. By nature WFSCollider is \gls{osc} capable and extendable by what \gls{sclang} has to offer. Its scores are saved as \gls{supercollider} code, as well.\\ It is currently only tested on macOS and is based upon a several year old version of \href{https://supercollider.github.io}{SuperCollider}. \subsection{SoundScape Renderer} \label{subsec:soundscaperenderer} The \gls{ssr}, written in C++, is a multi-purpose spatial audio renderer, that runs on Linux and macOS\@. Based on its underlying \gls{apf} \citep{inproceedings:geieretal2012}, it is able to use \gls{bs}, \gls{brs}, \gls{aap}, \gls{wfs}, \gls{nfc-hoa} and \gls{vbap}. However, all rendering algorithms with potentially orthogonal sound fields, are currently only available in 2D \citep{inproceedings:geieretal2008}.\\ It can be used with a \gls{qt4} based \gls{gui} or headless (without one), depicting the virtual sources, their volumes and positions. If a loudspeaker based renderer is chosen, the \gls{gui} also illustrates which speakers are currently used for rendering a selected source.\\ The \gls{ssr}, since its conception, had a history of conducting psychoacoustic experiments with it \citep{inproceedings:geierandspors2010}.\\ Current scientific research with the \gls{bs} and \gls{brs} renderers were done by \citet{mastersthesis:ackermannandilse2015}, \citet{mastersthesis:boehm2015} or \citet{mastersthesis:grigoriev2017}. The \gls{wfs} renderer has been improved by the work of several research papers, dealing with enhancements of spatial aliasing, active listening room and loudspeaker compensation and active noise control \citep{inproceedings:sporsetal2008} and analyzing and pre-equalizing in 2.5-dimensional \gls{wfs} \citep{inproceedings:sporsandahrens2008}. The loudspeaker based renderer was also used for psychoacoustic experiments, such as the one found in \citet{bachelorsthesis:koslowski2013}\\ The \gls{ssr} uses \gls{xml} based configuration files for reproduction (i.e.\ how something is played back) and scene (i.e.\ what is played back). The \gls{asdf} however is not (yet) able to represent dynamic setups.\\ The application can be controlled through a \gls{tcp}/\gls{ip} socket. \gls{osc} functionality can only be achieved using the capabilities of other applications such as \gls{pd} \citep{website:puredata2016} in combination with it.\\ Unlike \nameref{subsec:swonder} or \nameref{subsec:wfscollider}, the \gls{ssr} is not able to run medium or large-scale \gls{wfs} setups, as it lacks the features to communicate between instances of itself on several computers, while these instances serve a subset of the available loudspeakers. \subsection{Why Free Software Matters and What Its Pitfalls Are} \label{subsec:whyfreesoftwarematters} Free software is the terminology for software published under a free license. Licenses, such as the \gls{gpl} are considered free, because they allow for anyone to copy, modify and redistribute the source code (under the same license).\\ Research is a field of work, in which reproducability is very important, as findings need to be independently verifiable. Scientific work is published and shared (sometimes also under free licenses, such as \gls{cc}) amongst research groups of institutions all around the world. In an ideal world, all scientific research would be published under a free documentation license, such as the \gls{fdl}, allowing access to anyone, free of charge.\\ The software used in scientific institutions is unfortunately rarely free (e.g.\ word processing, statistics, mathematical calculations, realtime audio synthesis and audio production) and additionally mostly bound to proprietary \glspl{os}, such as Microsoft Windows or Apple's macOS, preventing interoperability, development and an open society.\\ However, free software enables students and researchers to learn from the source code directly (if they can and want to), to modify (and improve) it and to share their findings. More than with proprietary software, it is possible to have a community develop around it, that takes care of the project for a long time.\\ Free software nonetheless can not be considered superior. It is after all only a way of developing software and not a way to grade its efficiency or code quality. Additionally it has to be noted, that especially in a scientific context it can happen, that software is conceived by an institution, put to use, but later lacks the developers to drive the project onwards (e.g. \nameref{subsec:swonder}). Therefore, a high responsibility lies with these institutions, as they need to ensure further development on systems, not easily accessible to the public, or not feasible for home use (e.g. \gls{wfs}). This situation however also holds a great opportunity for cooperation.\\ As the development of free and open-source software is driven by its users and its contributors, its main goal should be to build a large and dedicated community at some point. Only this way new features can be developed, while taking care of bugs in the already existing source code.\\ Extending a software's functionality and improving its usability, such as that of the \gls{ssr}, can therefore be seen as an important step towards a more diverse user base and in effect ensuring its further development. \cleardoublepage \section{Implementation} \label{sec:implementation} This section covers the implementation of a networking interface for the \gls{ssr} and the considerations leading to it. The application was chosen to be extended by an \gls{osc} based networking interface, because it runs on multiple \glspl{os}, offers a wide set of rendering algorithms (in various stages of completion) by using the \gls{apf} \citep{inproceedings:geieretal2012}, is used extensively in scientific research, has the future possibility to run medium and large scale \gls{wfs} setups and was still actively maintained by its creators at the time of writing.\\ Software, such as the HoaLibrary (see~\ref{subsec:hoalibrary_puredata_extension}) or 3Dj, (see~\ref{subsec:3dj_supercollider_quark}) were not considered, as they were too reliant on their environment (i.e.\ \gls{pd} or \gls{supercollider}) and only implemented a small set of spatial audio renderers, while sWONDER was additionally unmaintained for a long period of time (see~\ref{subsec:swonder}) and WFSCollider bound to a non-free operating system (see~\ref{subsec:wfscollider}). \subsection{Outline} \label{subsec:outline} Initially, the aim was to extend the \gls{ssr}'s features in the scope of creating a replacement for the aging sWONDER software, enabling it to run networked instances to drive a medium or large scale \gls{wfs} setup. However, the approach appeared too narrow, as the application offers many different rendering algorithms. A networking extension therefore would have to be available to all of them with an equal feature set. Additionally, extending a rendering framework by a networking feature, with the help of only one of its engines proved to be linked to a massive, but avoidable overhead (see~\ref{subsubsec:preliminaries}).\\ The \gls{ssr}, being a multi-purpose spatial audio renderer, can be used in diverse setup scenarios (see~\ref{subsubsec:setups}). Therefore not only classic server-client relationships (see~\ref{subsubsec:remote_controlling_a_server}), but also client-only and local (see~\ref{subsubsec:remote_controlling_a_client}) setups have to be taken account of. In addition, the case of medium and large scale loudspeaker based rendering setups and their specifics have to be considered (see~\ref{subsubsec:rendering_on_dedicated_speakers}). \subsubsection{Prelimenaries} \label{subsubsec:preliminaries} In preparation to this work, an implemention of a side-by-side installation to the \gls{os} currently driving the \gls{wfs} system setup of the Electronic Studio at \gls{tu-berlin} \citep{website:tu-electronic_studio} was attempted for testing purposes.\\ Arch Linux \citep{website:archlinux} was installed and configured to run the medium scale setup. Unfortunately, the proprietary Dante \citep{website:audinate} driver for Linux, offered by Four Audio \citep{website:fouraudio}, creates non-trivial circumstances for using it on an up-to-date Linux kernel, due to \gls{alsa} \gls{api} changes not accounted for.\\ While the current \gls{os} --- an Ubuntu \citep{website:ubuntu} Studio 2012 \gls{lts} --- still runs well in its own parameters, its support has run out and it is therefore becoming harder, if not impossible, to build newer software on it, using newer versions of free software compilers.\\ For research purposes however, it is desirable to be able to try new kernel and software features on a regular basis. It is essential to find the most stable and secure setup possible involving realtime enabled kernels and building new versions of (spatialisation) software.\\ The hardware of the large scale setup at \gls{tu-berlin} in lecture hall H0104 was being updated and unusable at the time of writing. However, in the future it will become a valuable candidate for testing of the sought after \gls{ssr} features, as its setup involves no Dante network, but is instead run by several rendering computers connected to \gls{madi} and \gls{adat} lightpipe enabled speaker systems.\\ Although a \gls{wfs} setup for testing purposes was eligible, it is generally not required for implementing the features described in the following sections and subsections, as they can be tested using two machines running Linux, \gls{jack} and a development version of the \gls{ssr}.\\ \subsubsection{Remote Controlling a Server} \label{subsubsec:remote_controlling_a_server} An \gls{ssr} server instance in the notion of a medium to large scale reproduction setup is supposed to have a set of \textit{n} (pre-)assigned clients. Generally, controlling it should be possible through either \gls{udp} or \gls{tcp}. Every message sent to it should be distributed to all of its clients (if applicable), preferably using the same protocol used to communicate with the server. The messaging system should be flexible and scriptable.\\ All audio inputs available to the server should be available to its clients as well. A server instance should be able to render audio just as a client would. It should be able to receive messages from its clients and act upon them (e.g.\ updating \gls{gui} elements). \subsubsection{Remote Controlling Clients} \label{subsubsec:remote_controlling_a_client} An \gls{ssr} client can either be local (on the same machine) or somewhere on the same network, as the server or application controlling it. It should not make a difference, if the client instance is controlled by a server instance or any other application, implementing the messaging system it uses. A client should send an update to its server or the application controlling it, upon receiving a valid message. \subsubsection{Rendering on Dedicated Speakers} \label{subsubsec:rendering_on_dedicated_speakers} In a medium or large scale setup, \textit{n} clients collectively render audio on \textit{l} loudspeakers, while all should be using the same \textit{i} inputs and each have \textit{c} hardware outputs. \textit{l} is preferably a multiple of \textit{c}, but definitely larger than \textit{c}.\\ As the described setups usually have too many loudspeakers for only one machine (i.e.\ a client), a system has to be conceived, by which each client will only render on its specifically assigned subset of size \textit{c} of the \textit{n} loudspeakers. \subsection{Publisher/Subscriber Interface} \label{subsec:publisher_subscriber_interface} The \gls{ssr} internally uses a \gls{pubsub}, which is a design pattern to implement control through and over several parts of its components.\\ In \gls{oop}, \gls{pubsub} --- also called observer, listener messaging --- is usually comprised of a publisher class, handling the messages, without explicitly implementing how they will be used and a subscriber class, that allows for its implementations to subscribe to the messages provided. Filtering takes place to enable subscribers to only receive a certain subset of the messages.\\ The \gls{ssr} implements a content-based filtering system, in which each subscriber evaluates the messages received and acts depending on its own constraints to implement further actions upon it.\\ \begin{figure}[!htb] \centering \includegraphics[scale=0.6, trim = 6mm 130mm 12mm 4mm, clip] {ssr-publisher-with-legacy-subscribers.pdf} \caption[A diagram depicting a simplified version of the \gls{pubsub} used within the \gls{ssr} with all original subscribers.]{A diagram depicting a simplified version of the \gls{pubsub} used within the \gls{ssr} with all original subscribers.\\ {\color{pubsub-in}\textbf{--}} Calls from Publisher to Subscriber {\color{controller-in}\textbf{--}} Calls from Subscribers to Controller (Publisher) } \label{fig:ssr-publisher-with-legacy-subscribers} \end{figure} The abstract class \mintinline{c++}{Publisher} defines the messages possible to send and provides means to subscribe to them. The global \mintinline{c++}{Controller} class is its only implementation within the \gls{ssr}.\\ The abstract class \mintinline{c++}{Subscriber} in turn defines the messages understood, while its implementations in \mintinline{c++}{RenderSubscriber}, \mintinline{c++}{Scene}, \mintinline{c++}{OscSender} and \mintinline{c++}{NetworkSubscriber} take care of how they are used.\\ This system enables a versatile messaging layout, in which components can call the \mintinline{c++}{Publisher} functionality in \mintinline{c++}{Controller}, which in turn will send out messages to all of its subscribers.\\ Depending on the design of an application, \gls{pubsub} is not necessarily a one-way-road. As shown in Figure~\ref{fig:ssr-publisher-with-legacy-subscribers}, subscribers can also be able to call functions of the \mintinline{c++}{Publisher}, if the implementation permits it.\\ In the \gls{ssr} this is possible, because each \mintinline{c++}{Subscriber} holds a reference to the \mintinline{c++}{Controller} instance and is therefore able to call its public functions.\\ According to the principle of encapsulation in \gls{oop}, this type of functionality is handled by a separate class. In Figure~\ref{fig:ssr-publisher-with-all-subscribers}, the \mintinline{c++}{OscHandler} and \mintinline{c++}{Server} instances delegate calls to \mintinline{c++}{Controller} functionality to their utilities \mintinline{c++}{OscReceiver} and \mintinline{c++}{CommandParser} (respectively). \subsection{IP Interface} \label{subsec:ip-interface} The \gls{ssr} from early on incorporated a network interface, that accepts specially terminated \gls{xml}-formatted strings over a \gls{tcp} port, called “\gls{ip} interface”. This has the benefit of reusing the same \gls{xml} parser code in use for scene and reproduction description.\\ From the perspective of other available software, it is a downside though, that it is complicated to use, as a conversion to \gls{xml} has to be attempted before sending a message to the \gls{ssr}. Additionally, the message has to be linted (error checked) before sending and parsed again, after receiving an answer from the application.\\ The \gls{ip} interface achieves to offer more or less direct access to the \gls{pubsub} (see~\ref{subsec:publisher_subscriber_interface}). However, it has no notion of a networked setup and could therefore be described as a two-directional message system between two destinations. With it, only setups with up to \textit{n} clients are possible. \subsubsection{OSC through PureData} \label{subsubsec:osc_through_puredata} To allow \gls{osc} communication, the \gls{ssr} incorporates a Lua based \gls{pd} external. It uses two externals (\textit{IEMnet}\footnote{ \href{https://puredata.info/downloads/iemnet} {https://puredata.info/downloads/iemnet}} and \textit{pdlua}\footnote{\href{https://puredata.info/downloads/pdlua} {https://puredata.info/downloads/pdlua}}) alongside a Lua library for parsing and creating \gls{xml} (\textit{SLAXML}\footnote{\href{https://github.com/Phrogz/SLAXML} {https://github.com/Phrogz/SLAXML}}). \subsubsection{Sending and Receiving} \label{subsubsec:sending_and_receiving} As mentioned in section~\ref{subsec:publisher_subscriber_interface}, the \mintinline{c++}{NetworkSubscriber} class (part of the \gls{ip} interface) implements the \mintinline{c++}{Subscriber} interface. This implies that the network interface subscribes to the messages the \mintinline{c++}{Publisher} (the \mintinline{c++}{Controller} instance) has to offer. Every time a function of the \gls{ssr}'s \mintinline{c++}{Controller} instance, that was inherited from \mintinline{c++}{Publisher}, is called, it will issue the call on all of its subscribers, too. Every message, available to the \gls{ssr}'s~\ref{subsec:ip-interface} is therefore directly bound to its \gls{pubsub} interface's set of functions.\\ \cleardoublepage \subsection{Open Sound Control Interface} \label{subsec:osc-interface} The networking interface conceived in the course of this work was developed in several branches, using the git version control system (written by Linus Torvalds, now maintained by Junio Hamano\footnote{\href{https://git-scm.com}{https://git-scm.com}}), publicly on Github\footnote{\href{https://github.com/dvzrv/ssr} {https://github.com/dvzrv/ssr}}. Internally the liblo library (further explained in~\ref{subsubsec:liblo}) was harnessed to implement \gls{osc} functionality (see~\ref{subsubsec:open-sound-control}) for the \gls{ssr}.\\ After initial conversations with the current maintainer Matthias Geier through the project's Github issue tracker\footnote{ \href{https://github.com/soundscaperenderer/ssr} {https://github.com/soundscaperenderer/ssr}}, different ideas were worked out to achieve a broad solution to the server-client and client-only setups and to get a better understanding of the underlying design. Initial attempts, such as the mapping of a networking setup in the scene description\footnote{ \href{https://github.com/dvzrv/ssr/tree/distributed\_reproduction} {https://github.com/dvzrv/ssr/tree/distributed\_reproduction}}, proved too restrictive though, as it would allow the networking functionality only to renderers, that use loudspeakers and mix scene description with networking description.\\ A nearly configuration-less approach, based on subscribing clients on sending poll messages to them proved more open (in the sense that it can be interfaced with by any \gls{osc}-capable application or programming language) and have less configuration overhead. With it, a diverse set of setups can be achieved (further described in~\ref{subsubsec:setups}), which at the same time remain dynamically configurable (using a plethora of \gls{osc} implementations) and debuggable using tests (further explored in~\ref{subsec:automated_tests}). \begin{figure}[!htb] \centering \includegraphics[scale=0.6, trim = 6mm 91mm 12mm 10mm, clip] {ssr-publisher-with-all-subscribers.pdf} \caption[A diagram depicting a simplified version of the \gls{pubsub} used within the \gls{ssr} with all subscribers.]{A diagram depicting a simplified version of the \gls{pubsub} used within the \gls{ssr} with all subscribers.\\ {\color{pubsub-in}\textbf{--}} Calls from \mintinline{c++}{Publisher} to \mintinline{c++}{Subscriber} {\color{controller-in}\textbf{--}} Calls from \mintinline{c++}{Subscribers} to \mintinline{c++}{Controller} (\mintinline{c++}{Publisher}) } \label{fig:ssr-publisher-with-all-subscribers} \end{figure} The main implementations of the interface, further described in the following subsections, can be found in the classes \mintinline{c++}{OscHandler} (handling the \gls{osc} server), \mintinline{c++}{OscReceiver} (handling incoming \gls{osc} messages and acting upon them in the context of the \gls{ssr} instance) and \mintinline{c++}{OscSender} (responsible for reacting to calls from the \gls{pubsub}, as defined in~\ref{subsec:publisher_subscriber_interface} and sending of \gls{osc} messages to clients and server).\\ The class \mintinline{c++}{OscClient} implements the representation of a client (or server) to the message interface. It holds information about the client's address and port, along with its \mintinline{c++}{MessageLevel} (a concept elaborated in~\ref{subsubsec:message_levels}) and its alive counter (used to check, whether a given client is still available on the network).\\ As shown in Figure~\ref{fig:ssr-publisher-with-all-subscribers}, the \mintinline{c++}{OscSender} is another implementation of the \mintinline{c++}{Subscriber} interface. This way, every call made through the \mintinline{c++}{Publisher} (i.e.\ the \mintinline{c++}{Controller}), will be made on the corresponding function in \mintinline{c++}{OscSender} as well. With \mintinline{c++}{OscReceiver} the \gls{osc} interface has direct access to the \mintinline{c++}{Controller} and can make calls to it, on receiving a message.\\ In its implementation approach the \gls{osc} interface follows that of the \gls{ip} interface (see~\ref{subsec:ip-interface}). However, it expands in creating a client-server architecture, controlled by message levels (further elaborated in~\ref{subsubsec:message_levels}), using a unified message interface (explained in~\ref{subsubsec:message_interface}).\\ \gls{ssr} client instances only evaluate messages of server instances they are subscribed to. Server instances only evaluate messages of client instances, that are subscribed to them. \subsubsection{Open Sound Control} \label{subsubsec:open-sound-control} \gls{osc} is an “open, transport-independent, message-based protocol developed for communication among computers, sound synthesizers, and other multimedia devices” \citep{website:oscv1.0} developed at the \gls{cnmat}. Its 1.0 specification was published by Matthew Wright in 2002 \citep{website:oscv1.0} and the protocol has found widespread implementations (as libraries) in several programming languages. Many free and closed audio and video related applications (e.g. Ardour \citep{website:ardour}, Max/MSP \citep{website:cycling74}, \gls{supercollider} \citep{website:supercollider}) make use of it.\\ \gls{osc}'s syntax is defined by several parts, which are discussed briefly in this section.\\ \begin{itemize} \item Atomic data types, which are also reflected in type tags (see Table~\ref{tab:ssr-osc-data-type-acronyms} for details) \item Address patterns (an \gls{osc}-string starting with a “/”) \item Type tag string (a string, beginning with a “,”, holding a set of type tags, describing a collection of atomic data types) \item Arguments, a set of binary representations of each argument \item Messages, consisting (in sequence) of an address pattern, a type tag string and \textit{n} \gls{osc} arguments. \item Bundles, consisting of a set of Messages. \item Packets, the unit of transmission (sent over \gls{udp} or \gls{tcp}), consisting of a message or a bundle. \end{itemize} According to the specification, applications sending \gls{osc} packets are considered a client and the ones receiving packets a server. Therefore, applications can both be client and server at the same time.\\ \begin{table}[!htb] \renewcommand{\arraystretch}{1.2} \scriptsize \centering \rowcolors{2}{table-background-one}{table-background-two} \begin{tabular}{ p{2cm} | p{8cm} } \textbf{\gls{osc} type tag} & \textbf{Type} \\ \hline \texttt{i} & \mintinline{c++}{int32} \\ \texttt{f} & \mintinline{c++}{float32} \\ \texttt{s} & \gls{osc}-string \\ \texttt{b} & \gls{osc}-blob \\ \hline \hline \texttt{h} & 64 bit big-endian two's complement integer\\ \texttt{t} & \gls{osc}-timetag\\ \texttt{d} & 64 bit (“double”) IEEE 754 floating point number\\ \texttt{S} & Alternate type represented as an \gls{osc}-string (for example, for systems that differentiate “symbols” from “strings”)\\ \texttt{c} & An \gls{ascii} character, sent as 32 bits\\ \texttt{r} & 32 bit RGBA color\\ \texttt{m} & 4 byte \gls{midi} message. Bytes from MSB to LSB are: port id, status byte, data1, data2\\ \texttt{T} & True. No bytes are allocated in the argument data.\\ \texttt{F} & False. No bytes are allocated in the argument data.\\ \texttt{N} & Nil. No bytes are allocated in the argument data.\\ \texttt{I} & Infinitum. No bytes are allocated in the argument data.\\ \texttt{[} & Indicates the beginning of an array. The tags following are for data in the Array until a close brace tag is reached.\\ \texttt{]} & Indicates the end of an array.\\ \end{tabular} \caption{Acronyms (type tags) for atomic data types, used in \gls{osc} messages and bundles \citep{website:oscv1.0}.}\\ The first four types define the standard \gls{osc} type tags, which should be understood by all implementations. The remaining are non-standard types, that are implemented by most (e.g.\ liblo implements all but array and RGBA color type). \label{tab:ssr-osc-data-type-acronyms} \end{table} As shown in Table~\ref{tab:ssr-osc-data-type-acronyms}, only \mintinline{c++}{int32}, \mintinline{c++}{float32}, \gls{osc}-string and \gls{osc}-blob are considered standardized. However, most of the remaining non-standard types are implemented and used by many different clients. For implementing the \gls{ssr} \gls{osc} interface, described in subsection~\ref{subsubsec:liblo} --~\ref{subsubsec:workflow_examples}, it was necessary to use the non-standard types \textit{True} and \textit{False} alongside the standard-types. \subsubsection{Liblo} \label{subsubsec:liblo} Liblo \citep{website:liblo2017} is an implementation of the \gls{osc} protocol for \gls{posix} systems. It was initially developed by Steve Harris and is now actively maintained by Stephen Sinclair.\\ The library, written in C, offers a C++ abstraction layer and is released under the \gls{lgpl} v2.1 or greater. Additionally, there are wrappers for the Perl and \gls{python} programming languages.\\ Due to its long standing availability and usage in many small and large-scale software projects, alongside its fairly straight forward implementability, it was chosen as the candidate for establishing an \gls{osc} interface for the \gls{ssr}.\\ At the time of writing liblo's lastet stable release (0.28) was issued on 27th January 2014. Many changes and improvements have been applied to the codebase since then. One of them is the implementation of a \mintinline{c++}{ServerThread} for the C++ abstraction layer, which runs a \mintinline{c++}{Server} instance on a separate thread automatically.\\ In programming, threads are a way to implement simultaneous and/ or asynchroneous execution of code. The liblo \mintinline{c++}{Server} class, at the core of the C++ side of the library, is responsible for assigning a network port to listen to for incoming messages, listening for messages, executing code on their arrival (i.e.\ callback handling) and sending messages to clients. Many applications facilitating liblo use \gls{osc} only as a messaging system. This usually means, that such an application itself is not single-purpose and is busy computing something else most of the time. Therefore it makes sense to run a Server instance on a separate background thread, to not interfere with the executional flow of the rest of the program.\\ The \mintinline{c++}{ServerThread} class is able to free its ressources upon going out of scope (i.e.\ their ressources are not used by any object or function anymore), known as \gls{raii}. For this reason, the latest development version, instead of the current stable version of liblo, was chosen for the implementation. \subsubsection{Starting the SSR} \label{subsubsec:starting-the-ssr} The \gls{ssr} can be started with a rendering engine preselected (an executable postfixed by the supported rendering algorithm is provided by the software bundle --- e.g. \textbf{ssr-wfs}) or by selecting one through the configuration file, when using the standard executable named \textbf{ssr}. This way, the following renderers become available: \gls{aap}, \gls{bs}, \gls{brs}, generic, \gls{nfc-hoa}, \gls{vbap} and \gls{wfs}.\\ Additional features can be activated with the help of several flags to the executables. The customized ones, belonging to the \gls{osc} interface will be discussed in the following subsections. More information on the interplay between \gls{osc} messaging and the \gls{pubsub} (see~\ref{subsec:publisher_subscriber_interface}) can be found in~\ref{subsubsec:message_interface}. \paragraph{Client Instance} \label{para:client-instance} By default the \gls{ssr} is started as an \gls{osc} client on network port 50001 and only allows using ephemeral ports (in the range 49152--65535), suggested by the \gls{iana} according to \citet{rfc6335}. As shown in Listing~\ref{lst:ssr-binaural-client-start}, it is possible to use a different port, by defining it with the help of the \textbf{-p} flag.\\ \begin{listing}[!htb] \begin{mdframed} \begin{minted}[fontsize=\footnotesize]{shell} ssr-binaural -p “50002” \end{minted} \end{mdframed} \caption{Starting the \gls{ssr} using the \gls{bs} renderer as an \gls{osc} client (default) on the non-standard port 50002.} \label{lst:ssr-binaural-client-start} \end{listing} Once started, the client instance waits to receive a poll message from a server instance (or an application, mimicking one), upon which it will subscribe to it. Only then is it possible for the server application to control the client instance to the full extent via \gls{osc}. \paragraph{Server Instance} \label{para:server-instance} With the help of the \textbf{-N} flag, it is possible to start the \gls{ssr} as an \gls{osc} server. Additionally, the flag can be used in a future extension of the networking interface (see~\ref{subsubsec:non-renderer}). Additionally, in Listing~\ref{lst:ssr-binaural-server-start} flag \textbf{-C} is used to specify an initial client \gls{ip} and its port (the flag actually accepts a comma-separated list of \gls{ip}-port pairs).\\ The \textbf{-p} flag, for setting a specific port is also available, when starting a server instance. \begin{listing}[!htb] \begin{mdframed} \begin{minted}[fontsize=\footnotesize]{shell} ssr-aap -N “server” -C “127.0.0.1:50002” \end{minted} \end{mdframed} \caption{Starting the \gls{ssr} using the \gls{aap} renderer as an \gls{osc} server, with an initial client on localhost, port 50002 provided. } \label{lst:ssr-binaural-server-start} \end{listing} When the server instance starts, it instantly sends out periodic poll messages to all of its active clients. Clients provided by the \textbf{-C} flag are considered instantly active.\\ Additionally, it is possible for clients (\gls{ssr} client instances, or \gls{osc} capable applications) to subscribe to the server instance, or be subscribed to it by another client, using a message level system further explained in~\ref{subsubsec:message_levels}. Every valid \gls{osc} message sent to the server instance will be delegated to all of its clients upon evaluation, again according to the aforementioned message level system.\\ If a client instance has not answered the sent out poll message of a server 10 times, it is considered to be unavailable and will be deactivated. No messages will be sent to it anymore, until the client subscribes/ is subscribed again. \paragraph{Verbosity} \label{para:verbosity} The \gls{ssr} can be started with several levels of verbosity. These are accessed by using the flag \textbf{-v}, up to three times (i.e. \textbf{-vvv}).\\ The higher the level of verbosity, the more messages will be printed by the application. This especially applies to the \gls{osc} interface part of the \gls{ssr}, as most incoming and outgoing messages will be printed to \gls{stdout} at a level of \textbf{-vv}. At a level of \textbf{-vvv}, additionally all incoming and outgoing messages, that are issued in very short intervals per default (see~\ref{subsubsec:message_levels} for details) will be printed. \subsubsection{Setups} \label{subsubsec:setups} The \gls{ssr} offers the possibility for many different \gls{osc} enabled client-server and client-only setups. They will be explained in the following subsections.\\ All examples provide audio input via a \gls{jack} client, which can be local (on each client's or server's host computer) or provided through external audio inputs from another host computer (e.g.\ through \gls{adat} or \gls{madi}). However, this is not mandatory, as the \gls{ssr} is capable of playing back audio files directly.\\ The differences between server and client messaging is further elaborated in~\ref{subsubsec:message_interface}.\\ A special networked setup, in which the server instance is not rendering any audio, is discussed in~\ref{subsubsec:non-renderer}. \paragraph{Client-Server, Shared Rendering} \label{para:client_server_shared_rendering} In Figure~\ref{fig:ssr-client-server-shared-output}, the setup shows \textit{1} to \textit{n} client instances, controlled by a server instance. All instances are receiving audio from an external \gls{jack} client or from reading local files. Collectively, the \textit{n} clients and the server are rendering audio on a shared output system (e.g. \gls{wfs} or \gls{hoa}).\\ The server instance is controlled through its \gls{gui}, sends out \gls{osc} messages to all \textit{n} clients and receives their updated information (again through \gls{osc}). \begin{figure}[!htb] \centering \includegraphics[scale=1.0, trim = 20mm 204mm 10mm 10mm, clip] {ssr-client-server-shared-output.pdf} \caption[A diagram displaying an \gls{ssr} client/server setup, in which the server and the clients render audio collectively (e.g. \gls{wfs}). The server instance is not controlled via \gls{osc}, but controls its clients through it.]{A diagram displaying an \gls{ssr} client/server setup, in which the server and the clients render audio collectively (e.g. \gls{wfs}). The server instance is not controlled via \gls{osc}, but controls its clients through it.\\ {\color{osc-in}\textbf{--}} \gls{osc} input {\color{osc-out}\textbf{--}} \gls{osc} output {\color{audio-in}\textbf{--}} Audio input {\color{audio-out}\textbf{--}} Audio output } \label{fig:ssr-client-server-shared-output} \end{figure} \begin{figure}[!htb] \centering \includegraphics[scale=1.0, trim = 20mm 204mm 10mm 10mm, clip] {ssr-external-client-server-shared-output.pdf} \caption[A diagram displaying an \gls{ssr} client/server setup, in which the server and the clients render audio collectively (e.g. \gls{wfs}). The server instance is controlled by an \gls{osc} capable application (acting as another client) and controls its clients through \gls{osc} as well.]{A diagram displaying an \gls{ssr} client/server setup, in which the server and the clients render audio collectively (e.g. \gls{wfs}). The server instance is controlled by an \gls{osc} capable application (acting as another client) and controls its clients through \gls{osc} as well.\\ {\color{osc-in}\textbf{--}} \gls{osc} input {\color{osc-out}\textbf{--}} \gls{osc} output {\color{audio-in}\textbf{--}} Audio input {\color{audio-out}\textbf{--}} Audio output } \label{fig:ssr-external-client-server-shared-output} \end{figure} The setup shown in Figure~\ref{fig:ssr-external-client-server-shared-output} is similar to the previous one, with the exception, that the server instance is controlled by an external \gls{osc} capable application. This way, the server instance can also be run headless (without a \gls{gui}).\\ The set of \textit{n} clients report back to the server instance, which in turn reports back to the \gls{osc} enabled application (acting as another client). \cleardoublepage \paragraph{Client-Server, Separate Rendering} \label{para:client_server_separate_rendering} As shown in Figure~\ref{fig:ssr-client-server-separate-output}, it is possible to have a setup, in which, similar to the one described in Figure~\ref{fig:ssr-client-server-shared-output}, server and \textit{n} clients render the same sources, but on separate output systems (e.g.\ several \gls{bs}/\gls{brs} renderers or even a mixture of a \gls{wfs}/\gls{hoa} system and several \gls{bs}/\gls{brs} renderers).\\ \begin{figure}[!htb] \centering \includegraphics[scale=1.0, trim = 20mm 204mm 10mm 10mm, clip] {ssr-client-server-separate-output.pdf} \caption[A diagram displaying an \gls{ssr} client/server setup, in which the server and the clients render audio to separate outputs (e.g.\ multiple \gls{bs} renderers). The server instance is not controlled via \gls{osc}, but controls its clients through it.]{A diagram displaying an \gls{ssr} client/server setup, in which the server and the clients render audio to separate outputs (e.g.\ multiple \gls{bs} renderers). The server instance is not controlled via \gls{osc}, but controls its clients through it.\\ {\color{osc-in}\textbf{--}} \gls{osc} input {\color{osc-out}\textbf{--}} \gls{osc} output {\color{audio-in}\textbf{--}} Audio input {\color{audio-out}\textbf{--}} Audio output } \label{fig:ssr-client-server-separate-output} \end{figure} Figure~\ref{fig:ssr-external-client-server-separate-output} is an example of a similar setup, but again using an external \gls{osc} capable application to control the server instance.\\ \begin{figure}[!htb] \centering \includegraphics[scale=1.0, trim = 20mm 204mm 10mm 10mm, clip] {ssr-external-client-server-separate-output.pdf} \caption[A diagram displaying an \gls{ssr} client/server setup, in which the server and the clients render audio separately (e.g.\ multiple \gls{bs} renderers). The server instance is controlled by an \gls{osc} capable application (acting as another client) and controls its clients through \gls{osc} as well.]{A diagram displaying an \gls{ssr} client/server setup, in which the server and the clients render audio separately (e.g.\ multiple \gls{bs} renderers). The server instance is controlled by an \gls{osc} capable application (acting as another client) and controls its clients through \gls{osc} as well.\\ {\color{osc-in}\textbf{--}} \gls{osc} input {\color{osc-out}\textbf{--}} \gls{osc} output {\color{audio-in}\textbf{--}} Audio input {\color{audio-out}\textbf{--}} Audio output } \label{fig:ssr-external-client-server-separate-output} \end{figure} \cleardoublepage \paragraph{Clients Only} \label{para:clients_only} Using the new \gls{osc} interface, it is also possible to have client-only setups, in which an \gls{osc} capable application mimics an \gls{ssr} server. This way a set of \textit{n} clients can collectively (see Figure~\ref{fig:ssr-external-clients-only-shared-output}) or separately (see Figure~\ref{fig:ssr-external-clients-only-separate-output}) render audio, without the specific need of an \gls{ssr} server instance controlling them. The clients send their update information back to the controlling application.\\ Much of the functionality implemented in the server-side of the \gls{osc} interface however has to be reapplied to the controlling software and its behavior, when dealing with \gls{ssr} clients. \begin{figure}[!htb] \centering \includegraphics[scale=1.0, trim = 20mm 204mm 10mm 10mm, clip] {ssr-external-clients-only-shared-output.pdf} \caption[A diagram displaying an \gls{ssr} client cluster setup, in which a set of clients render audio collectively (e.g.\ medium or large-scale \gls{wfs} setup). An \gls{osc} capable application acts as an \gls{ssr} server instance and controls the clients.]{A diagram displaying an \gls{ssr} client cluster setup, in which a set of clients render audio collectively (e.g.\ medium or large-scale \gls{wfs} setup). An \gls{osc} capable application acts as an \gls{ssr} server instance and controls the clients.\\ {\color{osc-in}\textbf{--}} \gls{osc} input {\color{osc-out}\textbf{--}} \gls{osc} output {\color{audio-in}\textbf{--}} Audio input {\color{audio-out}\textbf{--}} Audio output } \label{fig:ssr-external-clients-only-shared-output} \end{figure} \begin{figure}[!htb] \centering \includegraphics[scale=1.0, trim = 20mm 204mm 10mm 10mm, clip] {ssr-external-clients-only-separate-output.pdf} \caption[A diagram displaying an \gls{ssr} client cluster setup, in which a set of clients render audio separately (e.g.\ multiple \gls{bs} renderers). An \gls{osc} capable application acts as a \gls{ssr} server instance and controls the clients.]{A diagram displaying an \gls{ssr} client cluster setup, in which a set of clients render audio separately (e.g.\ multiple \gls{bs} renderers). An \gls{osc} capable application acts as an \gls{ssr} server instance and controls the clients.\\ {\color{osc-in}\textbf{--}} \gls{osc} input {\color{osc-out}\textbf{--}} \gls{osc} output {\color{audio-in}\textbf{--}} Audio input {\color{audio-out}\textbf{--}} Audio output } \label{fig:ssr-external-clients-only-separate-output} \end{figure} \cleardoublepage \subsubsection{Message Levels} \label{subsubsec:message_levels} To be able to distinguish between types of clients and servers, several message levels were implemented for the \gls{osc} interface conceived in the course of this work.\\ The \mintinline{c++}{enumeration class MessageLevel} (see Listing~\ref{lst:ssr_global.h}) defines the four types \mintinline{c++}{CLIENT}, \mintinline{c++}{GUI_CLIENT}, \mintinline{c++}{SERVER}, \mintinline{c++}{GUI_SERVER}, which are represented as non-negative integers (in ascending order), starting from 0.\\ \begin{listing}[!htb] \begin{mdframed} \inputminted[numbers=left, firstline=54, lastline=61, fontsize=\footnotesize]{c++}{../../ssr/src/ssr_global.h} \end{mdframed} \caption{src/ssr\_global.h: \mintinline{c++}{enum class MessageLevel}} \label{lst:ssr_global.h} \end{listing}\\ \noindent\gls{ssr} client instances subscribe to \gls{ssr} server instances with the \mintinline{c++}{MessageLevel} \mintinline{c++}{CLIENT} by default. Server instances get the \mintinline{c++}{MessageLevel} \mintinline{c++}{SERVER} assigned to by each client on subscribing to it.\\ In the \gls{osc} interface it is implemented as follows: A (recycable and reconfigurable) list of clients is held by a server instance, which enables for the \mintinline{c++}{MessageLevel} to change for each client. Every client instance holds a (reconfigurable) server representation, that enables for the \mintinline{c++}{MessageLevel} to change for each client towards its server.\\ Several messages, such as information related to \gls{cpu} load or master signal level are not useful for a rendering client (additionally they are issued in very short intervals, which can lead to performance issues), which is why they are only sent to clients with a \mintinline{c++}{MessageLevel} \mintinline{c++}{GUI_CLIENT} or servers with a \mintinline{c++}{MessageLevel} \mintinline{c++}{GUI_SERVER}.\\ Lightweight \gls{osc} capable applications used to control an \gls{ssr} server instance are clients to said server instance. An elevated \mintinline{c++}{MessageLevel} of \mintinline{c++}{SERVER} (instead of \mintinline{c++}{CLIENT}) enables them to send messages to the server and have them evaluated.\\ Analogous to a server instance holding a \mintinline{c++}{MessageLevel} of \mintinline{c++}{GUI_SERVER} towards its clients, a client instance can hold the same \mintinline{c++}{MessageLevel} towards a server instance to receive the above mentioned performance heavy \gls{osc} messages. How the setting up of message levels is achieved, is further elaborated in the following section. \subsubsection{Message Interface} \label{subsubsec:message_interface} \gls{osc} offers the possibility of a hierarchical path tree that can be used to group messages by type (i.e.\ context). In conjunction with messages only understood by client or server (or a context dependant meaning), most of the messages understood by the \gls{ip} interface (see~\ref{subsec:ip-interface}) are implemented. Additional features, related to server-client and client-only functionality, were integrated as well.\\ In general, it can be distinguished between \textit{direct} messages --- sent from a server (or an application mimicking one) to a client or a server to trigger processing (see Table~\ref{tab:ssr-osc-processing-tracker-transport}), reference (see Table~\ref{tab:ssr-osc-reference}), scene (see Table~\ref{tab:ssr-osc-scene}), source (see Table~\ref{tab:ssr-osc-source}), tracker (see Table~\ref{tab:ssr-osc-processing-tracker-transport}) or transport (see Table~\ref{tab:ssr-osc-processing-tracker-transport}) related operations in the \gls{ssr} and \textit{update} messages (see Table~\ref{tab:ssr-osc-update}) --- sent from a client to a server upon successful processing an operation related to a \textit{direct} message.\\ \begin{table}[!htb] \renewcommand{\arraystretch}{1.2} \scriptsize \centering \rowcolors{2}{table-background-one}{table-background-two} \begin{tabular}{ p{2cm} | p{1cm} | p{3.5cm} | p{3cm} } \textbf{Path} & \textbf{Types} & \textbf{Description} & \textbf{Example}\\ \hline \texttt{/alive} & & Alive notification from client (in response to a /poll) & \texttt{[/alive]} \\ \texttt{/message\_level} & i & Set message level of sender & \texttt{[/message\_level, 1]} \\ \texttt{/message\_level} & ssi & Set message level of a specific client & \texttt{[/message\_level, “127.0.0.1”, “50002”, 1]} \\ \texttt{/subscribe} & F & Unsubscribe sender & \texttt{[/subscribe, false]} \\ \texttt{/subscribe} & Fss & Unsubscribe specific client & \texttt{[/subscribe, false, “127.0.0.1”, “50002”]}\\ \texttt{/subscribe} & T & Subscribe sender & \texttt{[/subscribe, true]} \\ \texttt{/subscribe} & Ti & Subscribe sender with specific message level & \texttt{[/subscribe, true, 1]} \\ \texttt{/subscribe} & Tssi & Subscribe specific client with specific message level & \texttt{[/subscribe, true, “127.0.0.1”, “50002”, 1]} \\ \end{tabular} \caption{\gls{osc} messages relevant for subscribing and setting of message levels for clients.\\ } \caption*{Understood by server.\\ Data types and their acronyms are listed in Table~\ref{tab:ssr-osc-data-type-acronyms}. } \label{tab:ssr-osc-subscribe} \end{table} \begin{table}[!htb] \renewcommand{\arraystretch}{1.2} \scriptsize \centering \rowcolors{2}{table-background-one}{table-background-two} \begin{tabular}{ p{2cm} | p{1cm} | p{3.5cm} | p{3cm} } \textbf{Path} & \textbf{Types} & \textbf{Description} & \textbf{Example}\\ \hline \texttt{/message\_level} & i & Set message level of sender (the server) & \texttt{[/message\_level, 1]} \\ \texttt{/poll} & & Poll client (continously sent), triggering subscribe (see Table~\ref{tab:ssr-osc-subscribe}) & \texttt{[/poll]} \\ \end{tabular} \caption{\gls{osc} messages relevant for polling and setting of message levels for servers subscribed to.\\ } \caption*{Understood by client.\\ Data types and their acronyms are listed in Table~\ref{tab:ssr-osc-data-type-acronyms}. } \label{tab:ssr-osc-client-poll-message-level} \end{table} A special set of \textit{direct} message are the \textit{subscribe} and \textit{message level} (see Table~\ref{tab:ssr-osc-subscribe}) and \textit{poll} and \textit{message level} (see Table~\ref{tab:ssr-osc-client-poll-message-level}) messages. The former --- understood only by \gls{ssr} server instances --- enable clients to subscribe (with a certain message level) or subscribe other clients (with a predefined message level) and set their own message level or that of another client. The latter set --- only understood by clients --- enables servers (or applications mimicking one) to poll yet unsubscribed clients to have them subscribe and subscribed clients to reply with an alive message. Similar to the \textit{message level} message understood by server instances, the one understood by clients sets the message level (of the server representation in the client).\\ \begin{table}[!htb] \renewcommand{\arraystretch}{1.2} \scriptsize \centering \rowcolors{2}{table-background-one}{table-background-two} \begin{tabular}{ p{3cm} | p{1.2cm} | p{3.5cm} | p{3cm} } \textbf{Path} & \textbf{Types} & \textbf{Description} & \textbf{Example}\\ \hline \texttt{/processing/state} & F & Unset processing state & \texttt{[/processing/state, false]} \\ \texttt{/processing/state} & T & Set processing state & \texttt{[/processing/state, true]} \\ \texttt{/tracker/reset} & & Reset tracker & \texttt{[/tracker/reset]} \\ \texttt{/transport/rewind} & & Rewind the \gls{jack} transport & \texttt{[/transport/rewind]} \\ \texttt{/transport/seek} & s & Seek to time code in \gls{jack} transport & \texttt{[/transport/seek, “42:00:00”]} \\ \texttt{/transport/state} & F & Unset \gls{jack} transport state & \texttt{[/transport/state, false]} \\ \texttt{/transport/state} & T & Set \gls{jack} transport state & \texttt{[/transport/state, true]} \\ \end{tabular} \caption{\gls{osc} messages relevant for processing, tracker and (\gls{jack}) transport related settings.\\ } \caption*{Understood by server and client.\\ Data types and their acronyms are listed in Table~\ref{tab:ssr-osc-data-type-acronyms}. } \label{tab:ssr-osc-processing-tracker-transport} \end{table} \begin{table}[!htb] \renewcommand{\arraystretch}{1.2} \scriptsize \centering \rowcolors{2}{table-background-one}{table-background-two} \begin{tabular}{ p{4.3cm} | p{1cm} | p{2.5cm} | p{4.4cm} } \textbf{Path} & \textbf{Types} & \textbf{Description} & \textbf{Example}\\ \hline \texttt{/reference/orientation} & f & Set azimuth of reference point & \texttt{[/reference/orientation, -90.0]} \\ \texttt{/reference/position} & ff & Set position of reference & \texttt{[/reference/position, 1.5, 2.0]} \\ \texttt{/reference\_offset/orientation} & f & Set azimuth of reference offset position & \texttt{[/reference\_offset/orientation, -90.0]} \\ \texttt{/reference\_offset/position} & ff & Set position of reference offset & \texttt{[/reference\_offset/position, 1.5, 2.0]}\\ \end{tabular} \caption{\gls{osc} messages relevant for reference management.\\ } \caption*{Understood by server and client.\\ Data types and their acronyms are listed in Table~\ref{tab:ssr-osc-data-type-acronyms}. } \label{tab:ssr-osc-reference} \end{table} \begin{table}[!htb] \renewcommand{\arraystretch}{1.2} \scriptsize \centering \rowcolors{2}{table-background-one}{table-background-two} \begin{tabular}{ p{4.5cm} | p{0.9cm} | p{2.5cm} | p{4.3cm} } \textbf{Path} & \textbf{Types} & \textbf{Description} & \textbf{Example} \\ \hline \texttt{/scene/amplitude\_reference \_distance} & f & Set amplitude reference distance. & \texttt{[/scene/amplitude\_reference \_distance, 6.0]}\\ \texttt{/scene/auto\_rotate\_sources} & F & Disable automatic rotation of sources. & \texttt{[/scene/auto\_rotate\_sources, false]}\\ \texttt{/scene/auto\_rotate\_sources} & T & Enable automatic rotation of sources. & \texttt{[/scene/auto\_rotate\_sources, true]}\\ \texttt{/scene/clear} & & Delete all sources & \texttt{[/scene/clear]}\\ \texttt{/scene/decay\_exponent} & f & Set amplitude decay exponent in virtual space ($1/r^{exp}$). & \texttt{[/scene/decay\_exponent, 2.0]}\\ \texttt{/scene/load} & s & Load scene from \gls{asdf} file. & \texttt{[/scene/load, “example.asd”]}\\ \texttt{/scene/master\_signal\_level} & f & Set the renderers signal level. & \texttt{[/scene/master\_signal\_level, -20]}\\ \texttt{/scene/save} & s & Save scene to \gls{asdf} file. & \texttt{[/scene/save, “example.asd”]}\\ \texttt{/scene/volume} & f & Set scene master volume. & \texttt{[/scene/volume, 0.23]}\\ \end{tabular} \caption{\gls{osc} messages relevant for scene management.\\ } \caption*{Understood by server and client.\\ Data types and their acronyms are listed in Table~\ref{tab:ssr-osc-data-type-acronyms}. } \label{tab:ssr-osc-scene} \end{table} When starting an \gls{ssr} server instance (see~\ref{para:server-instance}), it responds to the messages shown in Table~\ref{tab:ssr-osc-subscribe} ,~\ref{tab:ssr-osc-scene} ,~\ref{tab:ssr-osc-source} ,~\ref{tab:ssr-osc-processing-tracker-transport} ,~\ref{tab:ssr-osc-reference} and~\ref{tab:ssr-osc-update}. \begin{table}[!htb] \renewcommand{\arraystretch}{1.2} \scriptsize \centering \rowcolors{2}{table-background-one}{table-background-two} \begin{tabular}{ p{3.5cm} | p{1.5cm} | p{2.5cm} | p{4.3cm} } \textbf{Path} & \textbf{Types} & \textbf{Description} & \textbf{Example} \\ \hline \texttt{/source/delete} & i & Delete source with given id & \texttt{[/source/delete, 1]}\\ \texttt{/source/file\_channel} & ii & Set a source's file channel & \texttt{[/source/file\_channel, 1, 2]}\\ \texttt{/source/file\_name\_ or\_port\_number} & is & Set a source's file name or port number & \texttt{[/source/file\_name\_ or\_port\_number, 1, “1”]}\\ \texttt{/source/port\_name } & is & Set a source's \gls{jack} input port name & \texttt{[/source/port\_name, 1, “system:capture\_2”]}\\ \texttt{/source/gain} & if & Set a source's gain on a linear scale (0.0 - inf) & \texttt{[/source/gain, 1, 0.2]}\\ \texttt{/source/model} & is & Set a source's model & \texttt{[/source/model, 1, “point”]}\\ \texttt{/source/mute} & iF & Unmute a source & \texttt{[/source/mute, 1, false]}\\ \texttt{/source/mute} & iT & Mute a source & \texttt{[/source/mute, 1, true]}\\ \texttt{/source/name} & is & Set a source's name & \texttt{[/source/name, 1, “Daisy”]}\\ \texttt{/source/new} & i & Create a new source stub using id & \texttt{[/source/new, 1]}\\ \texttt{/source/new} & sssffffTFF & Create a new source (auto-generated id) with name, model, port number, X-coordinate, Y-coordinate, orientation, gain, movability, orientation movability and mute status & \texttt{[/source/new, “Daisy”, “point”, “1”, 1.0, 2.5, 90.0, 0.2, true, false, false]} \\ \texttt{/source/new} & sssffffisTFF & Create a new source (auto-generated id) with name, model, port number, X-coordinate, Y-coordinate, orientation, gain, file channel, properties file, movability, orientation movability and mute status & \texttt{[/source/new, “Daisy”, “point”, “1”, 1.0, 2.5, 90.0, 0.2, 2, “properties.xml”, true, false, false]} \\ \texttt{/source/orientation} & if & Set a source's orientation & \texttt{[/source/orientation, 1, -90.0]}\\ \texttt{/source/position} & iff & Set a source's position & \texttt{[/source/position, 1, 1.5, 2.0]}\\ \texttt{/source/position\_fixed} & iF & Set a source movable & \texttt{[/source/position\_fixed, 1, false]}\\ \texttt{/source/position\_fixed} & iT & Set a source immovable & \texttt{[/source/position\_fixed, 1, true]}\\ \texttt{/source/properties\_file} & is & Set a source's properties file & \texttt{[/source/properties\_file, 1, “source-properties.xml”]}\\ \end{tabular} \caption{\gls{osc} messages relevant for source management.\\ } \caption*{Understood by server and client.\\ Data types and their acronyms are listed in Table~\ref{tab:ssr-osc-data-type-acronyms}. } \label{tab:ssr-osc-source} \end{table} \noindent A client instance (see~\ref{para:client-instance}) will only respond to the \textit{direct} messages listed in Table~\ref{tab:ssr-osc-client-poll-message-level} ,~\ref{tab:ssr-osc-scene} ,~\ref{tab:ssr-osc-source} ,~\ref{tab:ssr-osc-processing-tracker-transport} and~\ref{tab:ssr-osc-reference}, but is able to send \textit{update} messages.\\ There is one significant difference between the \textit{direct} messages understood by the \gls{osc} interface and the functionality of the \gls{ip} interface. The latter expects source gain to be transmitted on a logarithmic scale, ranging from \textit{-inf} to \textit{inf}. However, the \gls{ssr} is internally calculating on a linear scale and a linear gain level of \textit{0} is therefore hard to be reached\footnote{ \href{https://github.com/SoundScapeRenderer/ssr/issues/28} {https://github.com/SoundScapeRenderer/ssr/issues/28}}. For a more intuitive use, a linear scale was chosen for the \gls{osc} interface, ranging from \textit{0.0} to \textit{inf} (see gain related messages in Table~\ref{tab:ssr-osc-source}), where \textit{1.0} signifies 100\% source level. \begin{table}[!htb] \renewcommand{\arraystretch}{1.2} \scriptsize \centering \rowcolors{2}{table-background-one}{table-background-two} \begin{tabular}{ p{6.5cm} | p{0.9cm} | p{4.3cm} } \textbf{Path} & \textbf{Types} & \textbf{Description} \\ \hline \texttt{/update/cpu\_load} & f & \gls{cpu} load changes.\\ \texttt{/update/processing/state} & T & Processing state is set.\\ \texttt{/update/processing/state} & F & Processing state is unset.\\ \texttt{/update/reference/orientation} & f & Reference orientation changes.\\ \texttt{/update/reference/position} & ff & Reference position changes.\\ \texttt{/update/reference\_offset/orientation} & f & Reference offset orientation changes.\\ \texttt{/update/reference\_offset/position} & ff & Reference offset position changes.\\ \texttt{/update/scene/amplitude\_reference\_distance} & f & Amplitude reference distance changes.\\ \texttt{/update/scene/auto\_rotate\_sources} & T & Auto rotation of sources is set.\\ \texttt{/update/scene/auto\_rotate\_sources} & F & Auto rotation of sources is unset.\\ \texttt{/update/scene/decay\_exponent} & f & The scene's decay exponent has changed.\\ \texttt{/update/scene/master\_signal\_level} & f & Master signal level has changed.\\ \texttt{/update/scene/sample\_rate} & i & Sample rate of the scene changed.\\ \texttt{/update/scene/volume} & f & Volume of the scene has changed.\\ \texttt{/update/source/delete} & i & A source with given id was deleted. \\ \texttt{/update/source/file\_channel} & ii & A source's file channel was set.\\ \texttt{/update/source/file\_name\_or\_port\_number} & is & A source's file name or port number was set.\\ \texttt{/update/source/gain} & if & A source's gain was set.\\ \texttt{/update/source/length} & ii & A source's length was set.\\ \texttt{/update/source/level} & if & A source's output level has changed.\\ \texttt{/update/source/model} & is & A source's model was set.\\ \texttt{/update/source/mute} & iF & A source was unmuted.\\ \texttt{/update/source/mute} & iT & A source was muted.\\ \texttt{/update/source/name} & is & A source's name was set.\\ \texttt{/update/source/orientation} & if & A source's orientation was set. \\ \texttt{/update/source/new} & i & A new source with given id was created. \\ \texttt{/update/source/port\_name} & is & A source's \gls{jack} port\_name was set. \\ \texttt{/update/source/position} & iff & A source's position was set. \\ \texttt{/update/source/position\_fixed} & iF & A source was set to be movable. \\ \texttt{/update/source/position\_fixed} & iT & A source was set to be immovable. \\ \texttt{/update/source/properties\_file} & is & A source's properties\_file was set.\\ \texttt{/update/transport/seek} & s & \gls{jack} transport seeked to a timecode position.\\ \texttt{/update/transport/state} & F & \gls{jack} transport was stopped.\\ \texttt{/update/transport/state} & T & \gls{jack} transport was started.\\ \end{tabular} \caption{\gls{osc} messages for updating information on \gls{cpu} load, processing, reference, scene, source, and transport of clients on a server.\\ No examples are given, as they are mostly analogous to the ones in Table~\ref{tab:ssr-osc-processing-tracker-transport} ,~\ref{tab:ssr-osc-scene} and~\ref{tab:ssr-osc-source}. } \caption*{Understood by server.\\ Data types and their acronyms are listed in Table~\ref{tab:ssr-osc-data-type-acronyms}. } \label{tab:ssr-osc-update} \end{table} \cleardoublepage \subsubsection{Workflow Examples} \label{subsubsec:workflow_examples} Using any \gls{osc} capable programming language or application enables for communication with the \gls{ssr}. The following examples illustrate simple workflows using \gls{sclang} and should therefore be \gls{os} agnostic.\\ \paragraph{Controlling a Server} \label{para:controlling_a_server} \begin{listing}[!htb] \begin{mdframed} \inputminted[numbers=left, firstline=3, lastline=29, fontsize=\footnotesize]{supercollider} {../../ssr/supercollider/workflows.scd} \end{mdframed} \caption{supercollider/workflows.scd: \gls{sclang} as client controlling an \gls{ssr} server instance} \label{lst:ssr-workflow-sclang-controls-server} \end{listing}\\ As shown in Listing~\ref{lst:ssr-workflow-sclang-controls-server}, it is necessary to subscribe to the server instance with a \mintinline{c++}{MessageLevel} of \mintinline{c++}{SERVER} or higher.\\ After doing so, also all \textit{direct} \gls{osc} messages (i.e. Table~\ref{tab:ssr-osc-source},~\ref{tab:ssr-osc-scene} ,~\ref{tab:ssr-osc-reference},~\ref{tab:ssr-osc-subscribe}) are evaluated when sent to the \gls{ssr}.\\ The server instance will relay valid messages to all of its active clients. \paragraph{Server Mimicry} \label{para:server_mimicry} \begin{listing}[!htb] \begin{mdframed} \inputminted[numbers=left, firstline=31, lastline=61, fontsize=\footnotesize]{supercollider} {../../ssr/supercollider/workflows.scd} \end{mdframed} \caption{supercollider/workflows.scd: \gls{sclang} mimics server, controlling an \gls{ssr} client instance} \label{lst:ssr-workflow-sclang-is-server-controls-client} \end{listing}\\ When mimicking an \gls{ssr} server instance in a client-only setup (e.g.\ Figure~\ref{fig:ssr-external-clients-only-shared-output} or Figure~\ref{fig:ssr-external-clients-only-separate-output}), it is necessary to send a poll message to the client instance to make it subscribe (which sets the server's address and port up internally).\\ Afterwards --- similar to the example in the subsection~\ref{para:clients_only} --- all \textit{direct} \gls{osc} messages are accepted by the client instance, when coming from the server address and port.\\ An interesting concept here is to (temporarily) set a different \mintinline{c++}{MessageLevel} for the application acting as a server (e.g.\ to \mintinline{c++}{GUI_SERVER}), to receive \gls{gui} relevant messages, as explained in~\ref{subsubsec:message_interface}.\\ \cleardoublepage \section{Discussion} \label{sec:discussion} The \gls{osc} based networking extension created for the \gls{ssr} can be considered a valuable usability improvement. Its implemented features are further discussed in the following section, followed by an outlook on related future work. The extension is additionally extensively documented in the source code, to ensure the ease of further development.\\ Due to the versatility of how the \gls{ssr} can be used in a networking context, it is likely, that some of its possibilities are not even accounted for. \subsection{Implemented Features} \label{subsec:implemented_features} The \gls{osc} interface described in~\ref{subsec:osc-interface} can be seen as a full replacement (with one minor exception, detailed in~\ref{subsubsec:scene_transfer}) for the \gls{ip} interface, already in place. Its additional features are what set it trully apart though, when not only regarding non-reliance on external software to enable \gls{osc} capabilities.\\ The implementation follows the internal \gls{pubsub} interface, as described in~\ref{subsec:publisher_subscriber_interface} and extends it, where appropriate. Additionally, an open client-server architecture has been created, according to a message level system, further elaborated in~\ref{subsubsec:message_levels}. An attempt at giving extensive examples on the various setup possibilities, that are now available, is made in~\ref{subsubsec:setups}, some of which are still dependant on various missing features (see~\ref{subsec:future_work}).\\ The \gls{osc} messaging system is adhering to the aforementioned client-server architecture by distinguishing between client-only, server-only and messages available to clients and servers alike (see~\ref{subsubsec:message_interface}). Examples for different workflows are given in~\ref{subsubsec:workflow_examples} to illustrate simple use cases.\\ This puts the \gls{osc} interface in the unique position of providing a native messaging interface and a flexible architecture. It can be used from single local instances up to large scale networked setups (with the limitations discussed in~\ref{subsubsec:alien_loudspeaker} and~\ref{subsubsec:assigning_in_and_outputs_on_the_fly}).\\ While with the \gls{ip} interface, multiple instances are only controllable by using an \gls{osc} capable application or one, that is able to send \gls{xml}-formatted strings over \gls{tcp}/\gls{ip}, the \gls{osc} interface can deal with \textit{n} clients natively, while only one instance has to be controlled using \gls{osc}. The behavior implies, that setups are possible, in which a large collection of different types of renderers can share the same scene, which is particularly useful for e.g.\ auditioning different rendering algorithms over the same system, or rather in the same room.\\ Message sending takes place over \gls{udp}, instead of \gls{tcp}, which lowers the complexity of the network topology (\gls{udp} does not perform handshakes for every packet sent, unlike \gls{tcp}) and thus the size of each message sent.\\ The \gls{osc} interface therefore implements messaging, while using lower bandwidth and offering a greater feature set. In~\ref{subsec:automated_tests} a test environment is introduced, that further elaborates the overall functionality and feasibility of the message interface. \subsection{Automated Tests} \label{subsec:automated_tests} The \gls{ssr} was developed without the help of a test framework, which is responsible for testing its components, after they have been changed. This means, that internal (e.g.\ the \gls{pubsub} interface) or external (e.g.\ the \gls{ip} or \gls{osc} interface) functionality might or might not work as expected. To test the \gls{osc} interface's logical coherency and robustness automatically, a set of tests was written in \gls{sclang}.\\ The tests are divided into those probing robustness of the \gls{osc} interface and others probing its functionality. The robustness tests further divide into server and client specific tests, where authorized and unauthorized access is tried. The functionality tests are grouped by tests for general operability, i.e.\ testing certain features or workflows once and long-running tests, where features are tried repeatedly.\\ \subsubsection{Robustness} \label{subsubsec:robustness} Listing~\ref{lst:ssr-tests-sclang-unsubscribed-controls-server} and~\ref{lst:ssr-tests-sclang-subscribed-controls-server} describe server-side tests for robustness. While the first test will not lead to any processed action by the server, the latter will. This is explained by \gls{sclang} not being a subscribed client with a \mintinline{c++}{MessageLevel} of \mintinline{c++}{SERVER} or higher in the first case. However, in the second test \gls{sclang} subscribes to the \gls{ssr} server instance, which is why the \gls{osc} messages are evaluated in this case.\\ \begin{listing}[!htb] \begin{mdframed} \inputminted[numbers=left, firstline=145, lastline=158, fontsize=\footnotesize]{supercollider} {../../ssr/supercollider/tests.scd} \end{mdframed} \caption{supercollider/tests.scd: \gls{sclang} (unsubscribed) tries to control an \gls{ssr} server} \label{lst:ssr-tests-sclang-unsubscribed-controls-server} \end{listing}\\ \begin{listing}[!htb] \begin{mdframed} \inputminted[numbers=left, firstline=160, lastline=183, fontsize=\footnotesize]{supercollider} {../../ssr/supercollider/tests.scd} \end{mdframed} \caption{supercollider/tests.scd: \gls{sclang} (subscribed) tries to control an \gls{ssr} server} \label{lst:ssr-tests-sclang-subscribed-controls-server} \end{listing}\\ The tests described in Listing~\ref{lst:ssr-tests-sclang-controls-client-unpolled} and~\ref{lst:ssr-tests-sclang-controls-client-polled} are client-side tests for robustness, that work in a similar fashion to the aforementioned server-side tests. While the sent \gls{osc} messages are not evaluated in the first case, because \gls{sclang}, mimicking a server instance (see~\ref{para:server_mimicry}), did not poll the \gls{ssr} client instance up front, in the second case the messages are evaluated, because it did poll the client first. \begin{listing}[!htb] \begin{mdframed} \inputminted[numbers=left, firstline=185, lastline=197, fontsize=\footnotesize]{supercollider} {../../ssr/supercollider/tests.scd} \end{mdframed} \caption{supercollider/tests.scd: \gls{sclang} tries to control an \gls{ssr} client (without polling it)} \label{lst:ssr-tests-sclang-controls-client-unpolled} \end{listing}\\ \begin{listing}[!htb] \begin{mdframed} \inputminted[numbers=left, firstline=199, lastline=214, fontsize=\footnotesize]{supercollider} {../../ssr/supercollider/tests.scd} \end{mdframed} \caption{supercollider/tests.scd: \gls{sclang} tries to control an \gls{ssr} client (with previously polling it)} \label{lst:ssr-tests-sclang-controls-client-polled} \end{listing}\\ In all tests for robustness the attempt is made to force errors in the implementation of the message interface (as defined in~\ref{subsubsec:message_interface}). This is achieved by purposely using ranges of data types for messages, that are not allowed or not defined in the \gls{ssr}'s internal implementation.\\ Two examples for weak spot exploitations were the use of negative integers for \glspl{id} in source related messages (only non-zero, non-negative \glspl{id} are allowed internally) or supplying an empty string as hostname or port number for subscription messages.\\ The first example will lead to undefined behavior, if the range is not checked in the implementation, because a \textit{static\_cast} is used internally to cast the value of the message data type (\textit{unsigned int}) to the one expected by the \gls{ssr}'s Controller implementation (\textit{signed int}) and the outcome of said operation is implementation dependant (depending on the \gls{os} in use).\\ The second example, if not checked for empty string, will lead to the \gls{osc} interface trying to create a possibly defective address and send poll messages out to it.\\ While only some of the above mentioned scenarios could lead to a crash of the program under certain circumstances, left unhandled, all of them waste ressources, which is undesired. To circumvent possibly harmful input using the \gls{osc} interface, a set of sanity checks were implemented, that only allow for a received message to be processed, if all of its components fit the requirements. \subsubsection{Functionality and Operability} \label{subsubsec:functionality_and_operability} \begin{listing}[!htb] \begin{mdframed} \inputminted[numbers=left, firstline=216, lastline=225, fontsize=\footnotesize]{supercollider} {../../ssr/supercollider/tests.scd} \end{mdframed} \caption{supercollider/tests.scd: \gls{sclang} controls an \gls{ssr} client (with previously polling it), creating several sources and moving them} \label{lst:ssr-tests-sclang-sources} \end{listing}\\ The test described in Listing~\ref{lst:ssr-tests-sclang-sources} is a test for functionality, which also serves as a long-running stress test for the \gls{ssr}. It creates 20 sources, that are then moved around randomly, every 100ms, for 100s, which on a Lenovo W540, with an Intel i7-4700MQ and 16Gb RAM created less than 50\% of \gls{cpu} load.\\ Based on the above mentioned tests, the basic functionality of the \gls{osc} interface can be guaranteed and depending on the host's hardware also a maximum degree of capacity utilization can be estimated, when observing the \gls{ssr}'s workload towards the system, while using the long-running tests.\\ It has to be mentioned, that a higher load can be observed, when using higher levels of verbosity (especially above \textbf{-vv}). This is explained by the fact, that the \gls{ssr} will print out every \gls{osc} message received and sent above the aforementioned verbosity level. \subsection{Future Work} \label{subsec:future_work} Several features, interesting for different use cases, were out of scope for this work. They are however complementary to the \gls{osc} networking extension, or can be implemented on top of it and will be discussed in the following subsections.\\ However, before any more changes can take place, the \gls{osc} interface first has to be merged into the main source code repository for the \gls{ssr}. This will also entail an update to the user manual\footnote{\href{https://ssr.readthedocs.io/en/latest/} {https://ssr.readthedocs.io/en/latest/}}, to ensure extensive documentation of the various \gls{osc} messages now understood by the software and updated build instructions, that come with the usage of liblo (see~\ref{subsubsec:liblo}). Especially the latter might prove as the defining time factor, as for seamless integration in the \glspl{os} a stable version of the \gls{osc} specification implementation will always be preferred over a development version. A request for a new stable release has already been directed towards the liblo maintainer\footnote{\href{https://sourceforge.net/p/liblo/bugs/42/} {https://sourceforge.net/p/liblo/bugs/42/}}. \subsubsection{Non-Renderer} \label{subsubsec:non-renderer} The \gls{ssr} features a \gls{gui}, that was in the process of being upgraded for \gls{qt5} at the time of writing. Future versions of the software could be used to also display setups of networking instances, instead of only displaying the ones, that are locally running.\\ The implementation could be desirable for massive multi-channel setups and simply switching between several (local or network-attached) \gls{ssr} instances alike. An additional identifier for the \textbf{-N} flag (see~\ref{para:server-instance}) could be used to start an instance in this mode.\\ \begin{figure}[!htb] \centering \includegraphics[scale=1.0, trim = 20mm 204mm 10mm 10mm, clip] {ssr-client-server-clients-only-shared-output.pdf} \caption[A diagram displaying an \gls{ssr} client/server setup, in which only the clients render audio collectively (e.g.\ medium or large-scale \gls{wfs}). The server instance is not controlled via \gls{osc}, but controls its clients through it. Additionally, its rendering engine does not have any outputs.]{A diagram displaying an \gls{ssr} client/server setup, in which only the clients render audio collectively (e.g.\ medium or large-scale \gls{wfs}). The server instance is not controlled via \gls{osc}, but controls its clients through it. Additionally, its rendering engine does not have any outputs.\\ {\color{osc-in}\textbf{--}} \gls{osc} input {\color{osc-out}\textbf{--}} \gls{osc} output {\color{audio-in}\textbf{--}} Audio input {\color{audio-out}\textbf{--}} Audio output } \label{fig:ssr-client-server-clients-only-shared-output} \end{figure} The functionalities of the \gls{ssr}'s \gls{gui}, its several spatial audio renderers and \gls{osc} interface (amongst other parts) are determined by its \gls{pubsub}. For the \gls{gui} part of the software to display information about a networked setup, or even switch between several of them, it is therefore not needed or even desirable for that instance to render audio at all. An instance with such features could be imagined as a \gls{gui} only frontend.\\ Figure~\ref{fig:ssr-client-server-clients-only-shared-output} illustrates a scenario, in which a server instance is used to control a set of \textit{n} clients, that collectively renders audio (e.g.\ on a large scale \gls{wfs} or \gls{nfc-hoa} system). In contrast to the client instances, the server does not render any audio (i.e.\ has no outputs) and might not even need any audio input.\\ The server in this example could also be a client, subscribed to a server instance in a cluster similar to the one in Figure~\ref{fig:ssr-external-client-server-shared-output} (i.e.\ as the \gls{osc} capable application controlling the server instance by using a \mintinline{c++}{MessageLevel} of \mintinline{c++}{SERVER} or higher). \begin{figure}[!htb] \centering \includegraphics[scale=1.0, trim = 20mm 204mm 10mm 10mm, clip] {ssr-client-server-clients-only-separate-output.pdf} \caption[A diagram displaying an \gls{ssr} client/server setup, in which only the clients render audio to separate outputs (e.g.\ multiple \glspl{bs} renderers). The server instance is not controlled via \gls{osc}, but controls its clients through it. Additionally, its rendering engine does not have any outputs.]{A diagram displaying an \gls{ssr} client/server setup, in which only the clients render audio to separate outputs (e.g.\ multiple \glspl{bs} renderers). The server instance is not controlled via \gls{osc}, but controls its clients through it. Additionally, its rendering engine does not have any outputs.\\ {\color{osc-in}\textbf{--}} \gls{osc} input {\color{osc-out}\textbf{--}} \gls{osc} output {\color{audio-in}\textbf{--}} Audio input {\color{audio-out}\textbf{--}} Audio output } \label{fig:ssr-client-server-clients-only-separate-output} \end{figure} The example shown in Figure~\ref{fig:ssr-client-server-clients-only-separate-output} is similar to the one before, with the difference, that its \textit{n} clients render audio on separate systems.\\ Said client instances could be a cluster of headphone renderers, such as \gls{brs} renderers, or loudspeaker renderers, such as \gls{vbap} or \gls{wfs}, or even a combination of both types.\\ Analogous to the example given before, the \gls{gui} only frontend could also act as a client with a \mintinline{c++}{MessageLevel} of \mintinline{c++}{SERVER} or higher (i.e.\ again being the \gls{osc} capable application), controlling the server in a setup, similar to the one shown in Figure~\ref{fig:ssr-external-client-server-separate-output}.\\ For the aforementioned use cases to work, the \gls{gui} of the \gls{ssr} has to be extended to show the networking specific information and be able to use the \gls{osc} interface through the \gls{pubsub} interface (which itself would probably have to be extended as well). Additionally, a renderer should be conceived, that does not render audio, but still uses the \gls{pubsub}, so the internal functionality of the \gls{ssr} can be reused, leading to a relatively light-weight \gls{ssr} \gls{gui} variant, only for controlling setups.\\ As there are many edge cases to networked setups, it seems still unclear, whether audio inputs would actually be needed for such a renderer. \cleardoublepage \subsubsection{Alien Loudspeaker} \label{subsubsec:alien_loudspeaker} For the examples given in Figure~\ref{fig:ssr-client-server-shared-output} ,~\ref{fig:ssr-client-server-clients-only-shared-output} ,~\ref{fig:ssr-external-clients-only-shared-output} and~\ref{fig:ssr-external-client-server-shared-output}, which facilitate a set of \textit{n} clients (server instances are counted as clients for the point made, where applicable), used for rendering in a medium or large scale loudspeaker based setup, an additional type of loudspeaker should be conceived and implemented in the different renderers.\\ \begin{listing}[!htb] \begin{mdframed} \inputminted[numbers=left, firstline=44, lastline=51, fontsize=\footnotesize]{c++}{../../ssr/src/loudspeaker.h} \end{mdframed} \caption{src/loudspeaker.h: \mintinline{c++}{enum model_t}} \label{lst:loudspeaker.h} \end{listing}\\ In the setups, where rendering takes place collectively, each client instance currently is reproducing the complete reproduction setup. This means, that for a reproduction setup with several hundred loudspeakers, each client creates exactly that amount of \gls{jack} outputs each, although it would only be responsible for discretely rendering audio on a subset of them.\\ To cope with this edge case, the current loudspeaker renderers would have to be extended to be able to distinguish between the loudspeakers of the current and that of other instances. This would enable them to create \gls{jack} outputs in the amount of the loudspeakers they are rendering audio for and reduce the overall processing usage.\\ In Listing~\ref{lst:loudspeaker.h} the \mintinline{c++}{enum model_t}, defining the loudspeaker types available to the \gls{ssr}, is extended to facilitate the new model type \mintinline{c++}{alien}, which could be used internally by the renderers to identify loudspeakers, not to render on.\\ Already when defining a reproduction setup for the \gls{ssr}, host-specific loudspeakers have to be taken into account. Listing~\ref{lst:asdf.xsd} shows an attempt at providing a unique attribute for each part of the setup, that is referencing a loudspeaker --- the hostname or \gls{ip} address of the host --- by extending the \gls{asdf}.\\ However, more work has to be put into implementing this feature, or rather improvement, as it also requires tests in medium and large scale setups, to ensure a discrete rendering, as if only using one host. \subsubsection{Status Messages} \label{subsubsec:status_messages} When reflecting about different use cases for networking setups involving the \gls{ssr}, it became apparent, that in certain situations it would be desirable to be able to poll instances for information, involving sources, scenes and the like.\\ One example is the implementation of a light-weight, single-purpose \gls{gui} (e.g.\ non-interactive display of source positions) in another programming language, such as \gls{python}, \gls{pd}, or \gls{supercollider}, while only relying on \gls{osc} for communication between the parts. Another example is the implementation of monitoring of certain aspects of a client or server instance (e.g.\ \gls{cpu} usage). Both examples should allow a \gls{gui} (or any other monitoring) process to be subscribed, after the active \gls{ssr} instances started rendering.\\ To be able to retrieve information from an \gls{ssr} instance, its \gls{pubsub} interface has to be extended and \mintinline{c++}{get} functions implemented --- where applicable --- to return the desired information. A special case of this feature is described in~\ref{subsubsec:scene_transfer}. \subsubsection{Scene Transfer} \label{subsubsec:scene_transfer} The \gls{ip} interface of the \gls{ssr} implements a functionality to transfer all information related to a scene as an \gls{xml} formatted string. This is useful, if the scene information should be stored on the machine requesting the information, instead of on the rendering machine.\\ Due to shortage of time to implement it and the original functionality heavily relying on the \gls{xml} associated code, the \gls{osc} interface still lacks this feature, which in its context could also be used to transfer all scene information to another client, subscribed to a server.\\ It would prove particularly useful, if clients could for example request the scene currently held by their server instance, in the case where they are started after the server has been started, but its scene already being setup. The server would then send a set of instructions as \gls{osc} messages, needed for setting up the scene in question.\\ For this feature to work reliably, some edge cases have to be considered, such as gaps in the list of source \glspl{id}: Every source gets a unique non-zero, non-negative \gls{id} assigned on creation. When a source is deleted, its \gls{id} is not assigned to a source anymore and will not be reused, unless the whole scene is deleted and a new cycle of source creation reaches a number that high.\\ This means, if a scene with source \gls{id} gaps has to be transferred, the \gls{osc} messages have to be designed in such a way, that they can account for them, as every source message (apart from \textit{/source/new}) requires a valid source \gls{id} and subsequent calls to the server would otherwise trigger incorrectly mapped operations on its clients sources.\\ Additionally, it would be useful to be able to transfer a scene to another client, by request, if the caller's \mintinline{c++}{MessageLevel} is \mintinline{c++}{SERVER} or higher.\\ The scene is a piece of redundant information, in a networked setup (whereas the reproduction setup is individual). Being able to transfer scenes, using \gls{osc}, would further improve usability, as the description files only have to be in one place. \subsubsection{Assigning In- and Outputs on the Fly} \label{subsubsec:assigning_in_and_outputs_on_the_fly} The \gls{ssr}, being a \gls{jack} client, is able to add inputs for its sources and outputs for its renderers according to the configuration variables \mintinline{yaml}{INPUT_PREFIX} and \mintinline{yaml}{OUTPUT_PREFIX}, as shown in Listing~\ref{lst:ssr.conf.example}.\\ \begin{listing}[!htb] \begin{mdframed} \inputminted[numbers=left, firstline=44, lastline=50, fontsize=\footnotesize, breaklines]{yaml}{../../ssr/data/ssr.conf.example} \end{mdframed} \caption{data/ssr.conf.example: \gls{jack} settings in the \gls{ssr} configuration file} \label{lst:ssr.conf.example} \end{listing}\\ The approach however is somewhat static, as it only allows setting up one predefined client during startup. This can be the system's hardware in- and outputs or other \gls{jack} clients. The selected input name is added to the prefix to connect an \gls{ssr} source to a \gls{jack} client output with that name. Following the configuration file example, given a source input name of \mintinline{yaml}{1}, the \gls{ssr} would connect to the \gls{jack} client port named \mintinline{yaml}{alsa_pcm:capture_1}.\\ Dynamically reassigning source input or renderer output connections is only possible by using external tools, able to handle connections of a \gls{jack} session, such as QjackCtl \citep{website:qjackctl}, Patchage \citep{website:patchage} or aj-snapshot \citep{website:aj-snapshot}.\\ Every \gls{jack} client is allowed to make connections to other clients on the same server on its own. This general feature should be harnessed in the case of the \gls{ssr} to allow assigning and reassigning of source inputs and renderer outputs on the fly and exposing this functionality to the \gls{osc} interface (i.e.\ by modifying and extending the \gls{pubsub} interface).\\ This would make the application more dynamic and allow easier scripting of scenes and reproduction setups alike. This becomes especially useful in the case of listening experiments, as the experiments will not have to rely on a mixture of different scripting languages anymore (e.g.\ see experimental setup in attachment of \citet{mastersthesis:grigoriev2017}). \subsubsection{Interpolation of Moving Sources} \label{subsubsec:interpolation_of_moving_sources} Using the \gls{ip} or \gls{osc} interface, it is possible to move sources in a scene to a new location. Unlike sWONDER (see~\ref{subsec:swonder}), the \gls{ssr} is not able to interpolate movement. When a new location for a source is requested, the movement is carried out instantly, whereas sWONDER is able to move a source (on a straight line) from one position to the next in a given time frame.\\ A series of movements can be requested in any desired time frame, which means, that spatial aliasing is very likely to occur, during sets of requests with a short time span between them.\\ To work around this, the \gls{ssr} should implement a comparable feature to the one sWONDER facilitates and apply rate limiting on the source positioning request, depending on a dynamically settable threshold value in milliseconds.\\ Applications, such as WFSCollider (see~\ref{subsec:wfscollider}) or 3Dj (see~\ref{subsec:3dj_supercollider_quark}) rely on their \gls{gui} or rather \gls{sclang} to implement dynamic movements (e.g.\ circular or randomized) \citep[pp. 56-62]{manual:wfscollider}. While the creative process of source paths generation is clearly best placed in a visually interactive process, its communication with the rendering engine has to be high-performance and if scalable to large setups, ideally with low network throughput.\\ Therefore it has to be evaluated, if implementing a set of understood geometrical shapes, instead of sending a high frequency of source positioning messages could be a more feasible solution in the case of the \gls{ssr}. In~\ref{subsubsec:dynamic_scene} a wider approach to this problem is discussed. \subsubsection{Dynamic Scene} \label{subsubsec:dynamic_scene} When using the \gls{ssr} in a more artistic context, such as musical scores, or in scientific experimental environments dealing with moving sources, this requires a dynamic scene.\ sWONDER (see~\ref{subsec:swonder}) has a scoreplayer application, that can be synced with \gls{midi}, which is used to record and play back scores (i.e.\ recorded source properties) from unvalidated \gls{xml} files. WFSCollider has a fully integrated timeline, that can be used to place multiple (even concurrent) events, save and play them \citep[p. 10]{manual:wfscollider}.\\ \begin{listing}[!htb] \begin{mdframed} \inputminted[numbers=left, firstline=84, lastline=98, fontsize=\footnotesize, breaklines]{xml}{../../ssr/data/asdf.xsd} \end{mdframed} \caption{data/asdf.xsd: Draft of the score element within the \gls{asdf} schema file} \label{lst:asdf.xsd-score} \end{listing}\\ Unfortunately the \gls{asdf} was never properly extended for these purposes (see Listing~\ref{lst:asdf.xsd-score}), which is why the \gls{ssr} is not able to deal with dynamic scenes in a comparable fashion yet.\\ Unlike sWONDER, the \gls{ssr} uses schema validated \gls{xml} only, whereas 3Dj and WFSCollider use a unique format or even \gls{supercollider} code as score files.\\ Schema validated input is less error-prone and should generally be preferred over single-purpose or self-conceived formats. Therefore, it would be a good step to consolidate the \gls{asdf} schema part responsible for dynamic elements, while keeping in mind the overall message throughput as discussed in~\ref{subsubsec:interpolation_of_moving_sources} and thus enable the \gls{ssr} to deal with dynamic scenes efficiently.\\ Additionally, the \gls{gui} efforts made for WFSCollider could be combined with an \gls{ssr} backend, as it lacks a tool for creation and controlling of dynamic content. \subsubsection{Network Enabled Head Tracking} \label{subsubsec:network_enabled_headtracking} Due to the higher availability of sensors, microcontrollers and embedded systems in recent years, it has become very affordable to build network enabled head tracking devices in small series. Many of the conceived devices, such as the \gls{gpl} licensed \textit{Hedrot} \citep{website:hedrot}, allow for \gls{osc} communication.\\ Using the \gls{osc} interface, such a head tracker can be added as a client to an \gls{ssr} instance. This will probably require rate-limiting the sensor output, but would enable a networked setup, that could prove to be cheaper, more reliable and flexible, than the compile-time opt-ins (i.e.\ VRPN, Polhemus Fastrak/ Patriot, InterSense InertiaCube3).\\ In the specific case of setting up a large array of independent \gls{brs} or \gls{bs} renderers, connected to one server instance or application, it might be required to extend the messaging system to allow passing on of messages from one client to a server only towards one specific other client (a type of proxy messaging). This would ensure, that every renderer can be supplied with a specific stream of \gls{osc} messages from its assigned head tracker. Additionally, single (and local) renderers can be started as a server instance and clients can be assigned to them flexibly. \cleardoublepage \bibliographystyle{../help/FG_AK_English_AuthorYear.bst} \bibliography{../bib/ssr-networking} \cleardoublepage \newpage \cleardoublepage \begin{appendices} \section{PDF Version} \label{appendix:pdf_version} The PDF version of this work can be found on the~\nameref{digital_ressource} as the file \mintinline{shell}{master-thesis/thesis/thesis.pdf}. \section{LaTeX Sources} \label{appendix:latex_sources} The {\LaTeX} sources for this work can be found on the \nameref{digital_ressource} in the file \mintinline{shell}{master-thesis/thesis/thesis.tex}. The accompanying BibTeX file is located in \mintinline{shell}{master-thesis/bib}.\\ All graphics used in this work can be found in \mintinline{shell}{master-thesis/images}. \section{Thesis Bibliography} \label{appendix:thesis_bibliography} The references used in this work, if not in the form of a website, can be found on the \nameref{digital_ressource} in the folder \mintinline{shell}{master-thesis/src}. \section{OSC Interface Source Code} \label{appendix:osc_interface_source_code} All C++ source code written for the \gls{osc} interface can be found on the~\nameref{digital_ressource} in the folder \mintinline{shell}{ssr/src/networking}. However, there are more parts of the original \gls{ssr} source code, that have been extended and modified, such as \mintinline{shell}{ssr/src/controller.h} or \mintinline{shell}{ssr/src/configuration.cpp}.\\ It is possible to get a better overview of the various changes, by using git's log features, as shown in Listing~\ref{lst:git-log}.\\ \begin{listing}[!htb] \begin{mdframed} \begin{minted}[fontsize=\footnotesize]{shell} cd ssr git log \end{minted} \end{mdframed} \caption{The git log feature used in the ssr folder of the~\nameref{digital_ressource}.} \label{lst:git-log} \end{listing}\\ To evaluate the differences between the original code base and the modified version, it is recommended to use Github's diff functionality for the dedicated branches: \begin{itemize} \item configuration-client-server\footnote{\href{https://github.com/SoundScapeRenderer/ssr/compare/master...dvzrv:configuration-client-server}{https://github.com/SoundScapeRenderer/ssr/compare/master...dvzrv:configuration-client-server}} \item networking-with-osc\footnote{\href{https://github.com/SoundScapeRenderer/ssr/compare/master...dvzrv:networking-with-osc} {https://github.com/SoundScapeRenderer/ssr/compare/master...dvzrv:networking-with-osc}} \item osc-tests\footnote{\href{https://github.com/SoundScapeRenderer/ssr/compare/master...dvzrv:osc-tests}{https://github.com/SoundScapeRenderer/ssr/compare/master...dvzrv:osc-tests}} \item reproduction-with-hostnames\footnote{\href{https://github.com/SoundScapeRenderer/ssr/compare/master...dvzrv:reproduction-with-hostnames}{https://github.com/SoundScapeRenderer/ssr/compare/master...dvzrv:reproduction-with-hostnames}} \item sclang-workflows\footnote{\href{https://github.com/SoundScapeRenderer/ssr/compare/master...dvzrv:sclang-workflows}{https://github.com/SoundScapeRenderer/ssr/compare/master...dvzrv:sclang-workflows}} \item alien-loudspeaker\footnote{\href{https://github.com/SoundScapeRenderer/ssr/compare/master...dvzrv:alien-loudspeaker}{https://github.com/SoundScapeRenderer/ssr/compare/master...dvzrv:alien-loudspeaker}} \end{itemize}\\ The source code developed in the aforementioned branches was merged into a new, local branch called \textit{testing} for the \nameref{digital_ressource}. However, all of them are available separately in this local source code repository.\\ Therefore, git can also be used locally to checkout a specific branch of the source code, as shown in Listing~\ref{lst:git-branch-checkout}.\\ \begin{listing}[!htb] \begin{mdframed} \begin{minted}[fontsize=\footnotesize]{shell} cd ssr git checkout networking-with-osc \end{minted} \end{mdframed} \caption{The git checkout feature used in the ssr folder of the~\nameref{digital_ressource} to checkout the \textit{networking-with-osc} branch.} \label{lst:git-branch-checkout} \end{listing}\\ Comparison between branches can also be done locally, as described in Listing~\ref{lst:git-diff-branch}.\\ \begin{listing}[!htb] \begin{mdframed} \begin{minted}[fontsize=\footnotesize]{shell} cd ssr git diff master...networking-with-osc \end{minted} \end{mdframed} \caption{The git diff feature used in the ssr folder of the~\nameref{digital_ressource} to display the difference between the \textit{networking-with-osc} and the \textit{master} branch.} \label{lst:git-diff-branch} \end{listing}\\ The examples in Listing~\ref{lst:git-branch-checkout} and~\ref{lst:git-diff-branch} can be applied analogous to the other branches. \section{SuperCollider Scripts} \label{appendix:supercollider_scripts} The \gls{supercollider} code written for the tests (see~\ref{subsec:automated_tests}) and workflows (see~\ref{subsubsec:workflow_examples}) are located on the \nameref{digital_ressource} in the folder \mintinline{shell}{ssr/supercollider}.\\ For using the scripts, \gls{supercollider} version 3.7, or above is recommended. \section{Reproduction Setup Changes} \label{appendix:reproduction_setup_changes} \begin{listing}[!htb] \begin{mdframed} \inputminted[numbers=left, firstline=44, lastline=62, fontsize=\footnotesize, breaklines]{xml}{../../ssr/data/asdf.xsd} \inputminted[numbers=left, firstline=250, lastline=264, fontsize=\footnotesize]{xml}{../../ssr/data/asdf.xsd} \inputminted[numbers=left, firstline=314, lastline=318, fontsize=\footnotesize, breaklines]{xml}{../../ssr/data/asdf.xsd} \inputminted[numbers=left, firstline=361, lastline=364, fontsize=\footnotesize, breaklines]{xml}{../../ssr/data/asdf.xsd} \inputminted[numbers=left, firstline=186, lastline=186, fontsize=\footnotesize]{xml}{../../ssr/data/asdf.xsd} \end{mdframed} \caption{data/asdf.xsd: Reproduction setup, loudspeaker, circular array and linear array definition in \gls{asdf}, extended by a \textit{hostname} attribute} \label{lst:asdf.xsd} \end{listing}\\ \end{appendices} \cleardoublepage \pagenumbering{Roman} \setcounter{page}{1} \pagestyle{plain} \printglossaries \newpage \listoffigures \newpage \listoflistings \newpage \listoftables \newpage \phantomsection \addcontentsline{toc}{section}{Digital Ressource} \section*{Digital Ressource} \label{digital_ressource} \vspace{23em} \centering \scriptsize This page holds a data disk. \newpage \end{document}