% Paper by Helmert

\documentclass{article}

\usepackage{amsmath}

\newcommand{\half}{\mbox{$\frac{1}{2}$}}
\newcommand{\third}{\mbox{$\frac{1}{3}$}}
\newcommand{\quarter}{\mbox{$\frac{1}{4}$}}

\begin{document}

\begin{flushleft}
  {\Large\textbf{The Calculation of the Probable Error from the Squares 
  of the Adjusted Direct Observations of Equal Precision and Fechner's
  Formula}} 
\end{flushleft}

\begin{flushleft}
  {\large\textbf{F R Helmert}}
\end{flushleft}

Let $\lambda$ denote the deviations of the observations from their
arithmetic mean, let $\sigma$ denote the mean error, and $\rho$ the
probable error.  Then the optimal estimate of $\rho$ is well known to be
given by the following formulae,
\begin{align}
  \rho &= 0.67449\dots\sigma \notag \\
  \sigma &= \sqrt{\frac{[\lambda\lambda]}{n-1}}
                \left[1\pm\sqrt{\frac{1}{2(n-1)}}\,\right]
\end{align}
where the square root in the bracket is the man error in the estimate of
$\hat\sigma$, expressed as a fraction of $\hat\sigma$.  It is our
intention to provide a somewhat more rigorous derivation of this formula
u nder the Gaussian law of error than given elsewhere, even where the
principles of probability theory are used.

If $\epsilon$ denotes a true error of an observation, then the future
probability of a ser $\epsilon_1$, \dots, $\epsilon_n$ is
\stepcounter{equation}
\begin{equation}
  \label{futureprob}
  \left[\frac{h}{\sqrt{\pi}}\right]^n
  e^{-h^2[\epsilon\epsilon]}d\epsilon_1\dots d\epsilon_n.
\end{equation}
For given $\epsilon_1$, \dots, $\epsilon_n$, by setting the probability
of a hypothesis $h$ proportional to this expresion, one obtains an
optimal value of $\sigma^2$
\setcounter{equation}{0}
\renewcommand{\theequation}{\Alph{equation}}
\begin{equation}
  \label{optimalsigma}
  \frac{1}{2h^2}=\hat\sigma^2=\frac{[\epsilon\epsilon]}{n}.
\end{equation}
However, since the $\epsilon$ are unknown, we are forced to estimate
$[\epsilon\epsilon]$ and this may be regarded as a weakness of previous
derivations.  This deficiency may be removed by the consideration that a
set $\lambda_1$, \dots, $\lambda_n$ may arise from true errors in an
infinity of ways.  But since only the $\lambda$ are given, we must
calculate the future probability of a set $\lambda_1$, \dots,
$\lambda_n$ and take this expression as proportional to the probability
of the hypothesis about $h$.

\section{Probability of a Set $\lambda_1$, \dots, $\lambda_n$ of
Deviations from the Arithmetic Mean}

In expression (\ref{futureprob}) we introduce the variables $\lambda_1$,
\dots, $\lambda_{n-1}$ and $\bar\epsilon$ in place of the $\epsilon$ by
the equations:
\begin{gather*}
  \epsilon_1=\lambda_1+\bar\epsilon,\qquad
  \epsilon_2=\lambda_2+\bar\epsilon, \dots \\
  \epsilon_{n-1}=\lambda_{n-1}+\bar\epsilon, \qquad
  \epsilon_n=-\lambda_1-\lambda_2-\dots-\lambda_{n-1}+\bar\epsilon
\end{gather*}

This transformation is in accord with the known relations between the
errors $\epsilon$ and deviations $\lambda$, since the addition of the
equations gives $n\bar\epsilon=[\epsilon]$; at the same time the
condition $[\lambda]=0$ is satisfied.  The determinant of the
transformation, a determinant of the $n$th degree, is
\[ \left|\begin{array}{cccccc}
   1     & \cdot & \cdot &  & \cdot & 1 \\
   \cdot & 1     & \cdot &  & \cdot & 1 \\
   \cdot & \cdot & 1     &  & \cdot & 1 \\
   \ \\
   \cdot & \cdot & \cdot &  & 1     & 1 \\
   -1    & -1    & -1    &  & -1    & 1
  \end{array}\right| = n.
\]

Consequently expression (\ref{futureprob}) becomes
\begin{equation}
  n\left[\frac{h}{\sqrt{\pi}}\right]^n
  e^{-h^2[\lambda\lambda]+h^2n\bar\epsilon^2}
  d\lambda_1 d\lambda_2\dots d\lambda_{n-1} d\bar\epsilon
\end{equation}
where $[\lambda\lambda]=\lambda_1^2+\lambda_2^2+\dots+\lambda_n^2$;
$\lambda_n=-\lambda_1-\lambda_2-\dots-\lambda_{n-1}$.  If we now
integrate over all possible values of $\bar\epsilon$, we obtain for the
probability of the set $\lambda_1\dots\lambda_n$ the expression
\renewcommand{\theequation}{\arabic{equation}}
\begin{equation}
  \label{problambdas}
  \sqrt{n}\left[\frac{h}{\sqrt{\pi}}\right]^{n-1}
  e^{-h^2[\lambda\lambda]}
  d\lambda_1 d\lambda_2\dots d\lambda_{n-1}.
\end{equation}
This may be verified by integration over all possible values of
$\lambda_1\dots\lambda_{n-1}$, which yields unity, as required.

\section{Optimal Hypothesis on $h$ for Given\\Deviations $\lambda$}

For given values of the $\lambda$'s we set the probability of a
hypothesis on $h$ proportional to expression (\ref{problambdas}).  A
standard argument then yields the optimal estimate of $h$ as the value
maximizing (\ref{problambdas}).  Differentiation shows that this occurs
when
\[ \frac{1}{2h^2}=\frac{[\lambda\lambda]}{n-1}. 
\]
which establishes the first part of formula 
\renewcommand{\thefootnote}{\fnsymbol{footnote}}
(1)\footnote{In the same way it is possible by strict use of probability
theory to derive a formula for $\sigma^2$ when $n$ observations depend
on $m$ unknowns, a result which the author has established to his
satisfaction and will communicate elsewhere.}.

\section{Probability of a Sum $[\lambda\lambda]$ of Squares of the
Deviations $\lambda$}

The probability that $[\lambda\lambda]$ lies between $u$ and $u+du$ is
from (\ref{problambdas})
\begin{equation}
  \sqrt{n}\left[\frac{h}{\sqrt{\pi}}\right]^{n-1}
  \int d\lambda_1 \dots \int d\lambda_{n-1}
  e^{-h^2[\lambda\lambda]},
\end{equation}
integrated over all $\lambda_1\dots\lambda_{n-1}$ satisfying
\[ u \leq [\lambda\lambda] \leq u+du. \]
We now introduce $n-1$ new variables $t$ by means of the equations
\begin{align*}
  t_1     &= \sqrt{2}(\lambda_1+\half\lambda_2+\half\lambda_3
                 +\half\lambda_3+\dots+\half\lambda_{n-1})\\
  t_2     &= \qquad\
             \sqrt{\frac{3}{2}}(\lambda_2+\third\lambda_3+\third\lambda_4
                   +\dots+\third\lambda_{n-1})\\
  t_3     &= \qquad\qquad\quad\
             \sqrt{\frac{4}{3}}(\lambda_3+\quarter\lambda_4
                   +\dots+\quarter\lambda_{n-1})\\  
  .\quad  &= \qquad . \qquad\qquad . \qquad\qquad . \\
  t_{n-1} &= \qquad\qquad\qquad\qquad\qquad\qquad
             \sqrt{\frac{n}{n-1}}\lambda_{n-1}
\end{align*}
With the determinant $\sqrt{n}$ of the transformation, the above
expression becomes
\[ \sqrt{n}\left[\frac{h}{\sqrt{\pi}}\right]^{n-1}
   \int dt_1 \dots \int dt_{n-1}
   e^{-h^2[tt]},
\]
the limits of integration being determined by the condition
\[ u \leq [tt] \leq u+du. \]

We now recognize that the probability for the sum of squares of the $n$
deviations $\lambda$, $[\lambda\lambda]=u$, is precisely the same
probability that the sum of squares $[tt]$ of $n-1$ true errors equals
$u$.  This last probability I gave in Schl\"omlich's journal, 1875,
p.\,303, according to which
\begin{equation}
  \label{probss}
  \frac{h^{n-1}}{\Gamma(\frac{n-1}{2})}
  u^{\frac{n-3}{2}}u^{-h^2u}du,
\end{equation}
is the probability that the sum of squares $[\lambda\lambda]$ of the
deviations $\lambda$ of $n$ equally precise observations from their mean
lies between $u$ and $u+du$.  Integration of (\ref{probss}) from $u=0$
to $\infty$ gives unity.

\section{The Mean Error of the Formula\\
$\hat\sigma=\sqrt{[\lambda\lambda]:(n-1)}$} 

Since it is difficult to obtain a generally valid formula for the
probable error of this formula, we confine ourselves to the mean error.

The mean error of the formula $\hat\sigma^2=
\frac{[\lambda\lambda]}{n-1}$ is known exactly, namely 
$\sigma^2\sqrt{2:(n-1)}$.  We have therefore
\[ \hat\sigma^2 = \frac{[\lambda\lambda]}{n-1}
                  \left[1\pm\sqrt{\frac{1}{2(n-1)}}\right]
\]
and if $n$ is large it follows by a familiar argument that
\[ \hat\sigma = \sqrt{\frac{[\lambda\lambda]}{n-1}}
                \left[1\pm\frac{1}{2}\sqrt{\frac{1}{2(n-1)}}\right]. 
\]

Formula (1) results.  However, if $n$ is small, for example equal to 2,
this argument lacks all validity.  For then $\sqrt{2:(n-1)}$ is no
longer small compared to 1, in fact even larger than 1 for $n=2$.  We
now proceed as follows.

The mean squared error of the formula
\[ \hat\sigma = \sqrt{[\lambda\lambda]:(n-1)} \]
is the mean value of
\[ \left[\sqrt{\frac{\lambda\lambda]}{n-1}}-\sigma\right]^2. \]
If one develops the square and recalls that $[\lambda\lambda]:(n-1)$ has
mean $\sigma^2$ or $1:2h^2$, it follows that the mean of the above is 
\[ \frac{1}{h^2}-
   \frac{\sqrt{2}}{h}\left[\sqrt{\frac{[\lambda\lambda]}{n-1}}\right]. \]
where the term in large brackets must be replaced by its mean value.

Consideration of formula (\ref{probss}) yields for the mean value of
$\sqrt{[\lambda\lambda]}$ the expression
\[ \frac{h^{n-1}}{\Gamma(\frac{n-1}{2})}
   \int_0^{\infty} u^{\frac{n-2}{2}}u^{-h^2u^2}du,
   \text{i.e.},
   \frac{\Gamma(\frac{n}{2})}{\Gamma(\frac{n-1}{2})},
\]
so that the mean squared error of $\hat\sigma$ is
\[ \frac{1}{h^2}\left[1-
   \frac{\Gamma(\frac{n}{2})}{\Gamma(\frac{n-1}{2})}
   \sqrt{\frac{2}{n-1}}\right].
\]
We must therefore regard the following formula as more accurate than
(1):
\begin{align}
  \hat\sigma &= \sqrt{\frac{[\lambda\lambda]}{n-1}}
                \left[1\pm\sqrt{}
                \left\{2- 
                \frac{\Gamma(\frac{n}{2})}{\Gamma(\frac{n-1}{2})}
                \sqrt{\frac{8}{n-1}}\right\}\right] \notag \\
  \hat\rho   &= 0.67449\dots\hat\sigma,
\end{align}
where the square root following $\pm$ signifies the mean error of the
formula for $\hat\sigma$.

\bigskip\bigskip

\begin{flushleft}
\textit{Originally published as:}
Der Genauigkeit der Formel von Peters zur Berechnung des
wahrscheinglichen Fehlers directer Beobachtungen gleicher Genauiigkeit,
\textit{Astron.\ Nachr.}\ \textbf{88} (1876), 113--132.
The title translated above is the title of the section concerned rather
than of the article.
\end{flushleft}

\end{document}

%