\documentclass[12pt, notitlepage]{article}

\usepackage{amsmath,amssymb,amsthm}
\usepackage{mathrsfs}
\usepackage{hyperref}
\usepackage[margin=1in]{geometry}
\usepackage{tikz-cd}
\usepackage[all,cmtip]{xy}

\newtheoremstyle{plainsl}% <name>
  {8pt plus 2pt minus 4pt}% <Space above>
  {8pt plus 2pt minus 4pt}% <Space below>
  {\slshape}% <Body font>
  {0pt}% <Indent amount>
  {\bfseries}% <Theorem head font>
  {.}% <Punctuation after theorem head>
  {5pt plus 1pt minus 1pt}% <Space after theorem head>
  {}% <Theorem head spec (can be left empty, meaning `normal')>

\theoremstyle{plainsl}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem*{lemma*}{Lemma}
\newtheorem{corollary}{Corollary}
\newtheorem*{corollary*}{Corollary}

\newenvironment{fineprint}{\medskip \begin{small}}{\end{small} \medskip}

\title{\textbf{Baer Invariants and the Birkhoff--Witt Theorem}}

\author{P. J. Higgins\\[4pt]
\textit{Department of Mathematics, King's College, Strand, London W.C.~2}}
\date{Received July 4, 1968\\[4pt]
Published in: \textit{Journal of Algebra} \textbf{11} (1969), pp.~469--482.}

\begin{document}

\maketitle

\medskip

\noindent
Digitized with Claude Opus 4.7 based on \href{https://doi.org/10.1016/0021-8693(69)90086-6}{a scan} from \href{https://www.elsevier.com/about/policies-and-standards/open-access-licenses/elsevier-user}{Elsevier Open Archive} \\(original scan: \href{https://doi.org/10.1016/0021-8693(69)90086-6}{doi:10.1016/0021-8693(69)90086-6}). \\
This is an unofficial re-edition of an article that appeared in an Elsevier publication. Elsevier has not endorsed this re-edition.\\
Proofread by Darij Grinberg 21 May 2026.
Appendices added by Darij Grinberg 22 May 2026.

\bigskip

\section*{1. Introduction}

The Birkhoff--Witt theorem asserts that, under certain conditions on a Lie algebra $M$ (over a commutative ring $R$), the graded algebra associated with the enveloping algebra of $M$ is canonically isomorphic with the symmetric algebra of the underlying module of $M$. It then follows that $M$ is embedded in its enveloping algebra. It is known that not all Lie algebras are so embedded, but the theorem has been proved in the following cases:

\begin{itemize}
\item[(i)] when $M$ is a free $R$-module, for any $R$ (Birkhoff \cite{birkhoff}, Witt \cite{witt});
\item[(ii)] when $R$ is a Dedekind domain, for any $M$ (Lazard \cite{lazard}, Cartier \cite{cartier});
\item[(iii)] when $R$ is an algebra over the rationals, for any $M$ (Cohn \cite{cohn}).
\end{itemize}

These conditions refer only to the module structure of $M$, and we aim to give a unified treatment in which the Lie product on $M$ and the enveloping algebra of $M$ are eliminated from the discussion at an early stage. They are replaced by a homological invariant $B(M)$, defined for an arbitrary module $M$, which depends only on the tensor algebra of $M$ and is one of the Baer invariants defined for algebras by Fr\"ohlich \cite{frohlich}. The vanishing of $B(M)$ implies the Birkhoff--Witt theorem for all Lie algebras with underlying module $M$, and we shall show that $B(M) = 0$ in the three cases mentioned above. In fact the condition $B(M) = 0$ is necessary and sufficient for the validity of a closely related embedding theorem for ``Lie structures over $M$'' in ``associative structures over $M$.'' The definition of these concepts is our starting-point.

\section*{2. Lie Structures and Associative Structures}

Let $R$ be a commutative ring (with identity) and let $M$ be an $R$-module. We denote by $T(M) = \bigoplus_{n \geq 0} (\otimes^n M)$ the tensor algebra of $M$ and identify $M$ with the homogeneous part of $T(M)$ of degree $1$. (All tensor products are taken over $R$ unless otherwise stated). The canonical map from $T(M)$ to the symmetric algebra $S(M)$ has as kernel the ideal of $T(M)$ generated by all commutators $xy - yx$ ($x, y \in M$). We denote this kernel by $K(M)$; it is a homogeneous ideal of $T(M)$.

If we consider $K(M)$ as a $T(M)$-bimodule, it is generated by the commutators $xy - yx$, and these satisfy some obvious relations which we use as axioms in the following definition. A \textit{Lie structure over the $R$-module $M$} is a $T(M)$-bimodule $A$ together with a bilinear function $M \otimes M \to A$ (denoted by $x \otimes y \mapsto \langle x, y \rangle$), satisfying the axioms:
\begin{itemize}
\item[(L1)] $\langle x, x \rangle = 0$ \quad ($x \in M$);
\item[(L2)] $\langle x, y \rangle t(uv - vu) = (xy - yx) t \langle u, v \rangle$ \quad ($x, y, u, v \in M$, $t \in T(M)$);
\item[(L3)] $(\langle x, y \rangle z - z \langle x, y \rangle) + (\langle y, z \rangle x - x \langle y, z \rangle) + (\langle z, x \rangle y - y \langle z, x \rangle) = 0$ \quad ($x, y, z \in M$).
\end{itemize}
It is easy to check that $K(M)$ is a Lie structure with $\langle x, y \rangle = xy - yx$. So is $T(M)$ with the same definition of $\langle x, y \rangle$, and other examples will occur below.

As with Lie algebras, one can easily obtain Lie structures from similar associative structures. We define an \textit{associative structure over $M$} to be a $T(M)$-bimodule $B$ together with a bilinear function $M \otimes M \to B$ (denoted by $x \otimes y \mapsto (x, y)$) satisfying the associative law:
\begin{itemize}
\item[(A)] $(x, y) z = x (y, z)$ \quad ($x, y, z \in M$).
\end{itemize}
Then $B$ becomes a Lie structure over $M$ if we define $\langle x, y \rangle = (x, y) - (y, x)$. Axioms (L1) and (L3) are obviously satisfied. Axiom (L2) need only be checked when $t = z_1 z_2 \cdots z_n$ ($z_i \in M$), and in this case axiom (A) implies $(x, y) z_1 z_2 \cdots z_n uv = xy z_1 z_2 \cdots z_n (u, v)$, from which (L2) follows easily.

For any given module $M$ there is a universal Lie structure $L(M)$ over $M$ which can be described as follows. $L(M)$ is generated as $T(M)$-bimodule by symbols $\langle x, y \rangle$, one for each pair of elements $x, y$ of $M$, and has defining relations (L1), (L2), (L3) together with the relations that assert the bilinearity of the function $\langle, \rangle$. This $L(M)$ is characterised up to isomorphism by the universal property: if $(A, \langle\!\langle, \rangle\!\rangle)$ is any Lie structure over $M$ then there is a unique morphism $L(M) \to A$ of $T(M)$-bimodules such that $\langle x, y \rangle \mapsto \langle\!\langle x, y \rangle\!\rangle$ for all $x, y \in M$.

There is also, of course, a universal associative structure over $M$, but we do not need a special notation for it since it can easily be identified. In $T(M)$, the ideal $M^2 T(M) = \bigoplus_{n \geq 2} (\otimes^n M)$ is an associative structure over $M$ with $(x, y) = xy$, and we have

\begin{theorem}
\quad $M^2 T(M)$ is the universal associative structure over $M$.
\end{theorem}

\begin{proof}
Let $(B, (,))$ be any associative structure over $M$. For $j \geq 2$ we can map $\otimes^j M$ to $B$ by the rule $x_1 \otimes x_2 \otimes \cdots \otimes x_j \mapsto (x_1, x_2) x_3 \cdots x_j$ since the latter is an $R$-multilinear function. This gives a map $\theta : M^2 T(M) \to B$ which is clearly a homomorphism of right $T(M)$-modules and maps $xy$ to $(x, y)$. To show that it is also a left $T(M)$-homomorphism it is enough to show that $y_1 y_2 \cdots y_n (x_1, x_2) x_3 \cdots x_j = (y_1, y_2) y_3 \cdots y_n x_1 \cdots x_j$, ($y_r, x_s \in M$), and this is a consequence of the associative law for $B$. The map $\theta$ is unique because the elements $xy$ ($x, y \in M$) generate $M^2 T(M)$ as $T(M)$-bimodule.
\end{proof}

It is unfortunately not always true that $K(M)$ is the universal Lie structure over $M$. Indeed, it is precisely when $K(M) \cong L(M)$ that we can prove the Birkhoff--Witt theorem for Lie algebras on the module $M$. The proof of this theorem given in the next section is essentially that of Lazard \cite{lazard}, with some simplifications made possible by the axiomatic approach.

\section*{3. The Birkhoff--Witt Theorem}

Suppose that we are given a multiplication $M \otimes M \to M$ ($x \otimes y \mapsto [x, y]$) which makes $M$ a Lie algebra over $R$. The elements $\langle x, y \rangle = xy - yx - [x, y]$ of $T = T(M)$ ($x, y \in M$) generate a (non-homogeneous) ideal $J$, and the quotient algebra $E = T/J$ is the \textit{enveloping algebra} of the Lie algebra $M$. The filtration $T_0 \subset T_1 \subset T_2 \subset \cdots$ of $T$ given by $T_n = \bigoplus_{i \leq n} (\otimes^i M)$ induces filtrations of $K = K(M)$, $S = S(M)$, $J$ and $E$ as follows: $K_n = K \cap T_n$, $S_n = (T_n + K)/K$, $J_n = J \cap T_n$, $E_n = (T_n + J)/J$. $E$ is then a filtered algebra and we denote by $G = \bigoplus_{n \geq 0} (E_n / E_{n-1})$ the associated graded algebra\footnote{\textit{Comment by DG:} The direct sum ranged over $n \geq 1$ in the original.}. The homogeneous components of $G$ are
\[
E_n / E_{n-1} \cong (T_n + J)/(T_{n-1} + J) \cong T_n / (T_n \cap (T_{n-1} + J)) \cong T_n / (T_{n-1} + J_n).
\]
Now $S$ is itself graded with homogeneous components
\[
S_n / S_{n-1} \cong T_n / (T_{n-1} + K_n),
\]
and it is clear from the definition of $J$ that $T_{n-1} + J_n \supset T_{n-1} + K_n$. We therefore have canonical surjections $\sigma_n : S_n / S_{n-1} \to E_n / E_{n-1}$ with kernels $(T_{n-1} + J_n)/(T_{n-1} + K_n)$. These give the canonical surjection $\sigma : S \to G$ which is in fact the algebra homomorphism induced by the canonical map from $M$ to the (commutative) algebra $G$. If $\sigma$ is an isomorphism we say that the Lie algebra $M$ has the \textit{Birkhoff--Witt property}, and this is clearly equivalent to the condition
\begin{equation}
J_n \subset T_{n-1} + K_n \qquad (n \geq 1). \label{1}\tag{1}
\end{equation}

\begin{theorem}
If $(K(M), xy - yx)$ is the universal Lie structure over $M$ then every Lie algebra on $M$ has the Birkhoff--Witt property.
\end{theorem}

\begin{proof}
Following Lazard, we introduce modules $J_{(n)}$ ($n \geq 1$) which consist of those elements of $J$ that are most obviously in $T_n$, namely
\[
J_{(n)} = \sum \left( T_r \langle x, y \rangle T_s \mid x, y \in M, \, r + s = n - 2 \right).
\]
Then $J_{(1)} = 0$, $J_{(2)}$ is the $R$-module spanned by all $\langle x, y \rangle = xy - yx - [x, y]$, and $J_{(1)} \subset J_{(2)} \subset \cdots \subset J_{(n)} \subset \cdots \subset J$. Also $J_{(n)} \subset J_n$, and $\bigcup_n J_{(n)} = J$. We write $A_n = J_{(n)}/J_{(n-1)}$ ($n \geq 2$), and form the graded $R$-module $A = \bigoplus_{n \geq 2} A_n$ associated with this new filtration of $J$. Since $J_{(n)} M + M J_{(n)} \subset J_{(n+1)}$, $A$ has the structure of a $T$-bimodule with $A_n M + M A_n \subset A_{n+1}$. Also, $\langle x, y \rangle \in J_{(2)} = A_2$ for all $x, y \in M$.

\begin{lemma*}
$(A, \langle, \rangle)$ is a Lie structure over $M$.
\end{lemma*}

\begin{proof}
(L1): this is trivially true since $\langle x, x \rangle = [x, x] = 0$ in $M$.

(L2): let $w = \langle x, y \rangle t(uv - vu) - (xy - yx) t \langle u, v \rangle$, where $x, y, u, v \in M$, and where $t \in T$ is homogeneous of degree $n$. Then, calculating in $A$, we have $w \in A_{n+4}$ since $\langle x, y \rangle \in A_2$ and $\langle u, v \rangle \in A_2$. But if we calculate in $J_{(n+4)}$ we find that
\[
w = \langle x, y \rangle t \{ \langle u, v \rangle + [u, v] \} - \{ \langle x, y \rangle + [x, y] \} t \langle u, v \rangle = \langle x, y \rangle t [u, v] - [x, y] t \langle u, v \rangle,
\]
and this lies in $J_{(n+3)}$ since $[u, v]$ and $[x, y]$ lie in $M$. Hence $w = 0$ in $A$, and (L2) follows for all $t$ by linearity.

(L3): let $x, y, z \in M$, and put
\[
u = \{ \langle x, y \rangle z - z \langle x, y \rangle \} + \{ \langle y, z \rangle x - x \langle y, z \rangle \} + \{ \langle z, x \rangle y - y \langle z, x \rangle \}.
\]
Then, calculating in $A$, we have $u \in A_3$. On the other hand, calculating in $J_{(3)}$, if we replace $\langle x, y \rangle$ by $xy - yx - [x, y]$, we obtain
\[
u = \{ [x, y] z - z [x, y] \} + \{ [y, z] x - x [y, z] \} + \{ [z, x] y - y [z, x] \},
\]
the other terms cancelling. Now $[x, y] \in M$, so $\langle [x, y], z \rangle \in J_{(2)}$, that is,
\[
[x, y] z - z [x, y] \equiv [[x, y], z] \pmod{J_{(2)}}.
\]
Permuting $x, y, z$ cyclically and adding, we therefore have $u \in J_{(2)}$ by the Jacobi law in $M$, and this means that $u = 0$ in $A$. The lemma is now proved.
\end{proof}

To prove the theorem we use the hypothesis that $K(M)$ is the \emph{universal} Lie structure over $M$ to obtain a morphism $\theta : K \to A$ of $T$-bimodules sending $xy - yx$ to $\langle x, y \rangle$ for all $x, y \in M$. We claim that $\theta$ is an isomorphism. For let $\delta_n$ denote the $R$-linear map which sends any element of $T$ to its homogeneous part of degree $n$. It is clear from the definition of $J_{(n)}$ that $\delta_n$ maps $J_{(n)}$ into $K_n$ and therefore induces a map
\[
\delta_n^* : A_n = J_{(n)}/J_{(n-1)} \to K_n / K_{n-1}.
\]
The maps $\delta_n^*$ combine to give a map $\delta : A \to \bigoplus (K_n / K_{n-1}) = K$ which sends $\langle x, y \rangle$ to $xy - yx$ ($x, y \in M$), and it is easy to check that $\delta$ is a morphism of $T$-bimodules. Since $A$ and $K$ are generated as $T$-bimodules by all $\langle x, y \rangle$ and all $xy - yx$, respectively, we see that $\theta$ and $\delta$ are inverse isomorphisms. In particular, $\delta_n^* : J_{(n)}/J_{(n-1)} \to K_n / K_{n-1}$ is an injection and it follows that every element of $J_{(n)}$ not in $J_{(n-1)}$ has leading term of degree exactly $n$. Since $\bigcup_n J_{(n)} = J$, this implies that $J_{(n)} = J \cap T_n = J_n$ for all $n$\ \ \ \ \footnote{\textit{Comment by DG:} Here are the details of this argument: We must show that $J_{(n)} = J_n$. Since $J_{(n)} \subset J_n$ is known, it suffices to show that $J_n \subset J_{(n)}$. So let $t \in J_n$. Then, $t \in J$, so that $t \in J_{(m)}$ for some $m\geq 0$. Consider the smallest such $m$. Then, $t \in J_{(m)} \setminus J_{(m-1)}$, so that the residue class of $t$ in $J_{(m)}/J_{(m-1)}$ is nonzero. Since $\delta_m^* : J_{(m)}/J_{(m-1)} \to K_m / K_{m-1}$ is an injection (as we showed above), we conclude that $\delta_m(t)$ is nonzero as well. In other words, $t$ has a nonzero homogeneous part of degree $m$. Since $t \in J_n \subset T_n$, this entails $m \leq n$, so that $t \in J_{(m)} \subset J_{(n)}$. Having proved this for each $t \in J_n$, we thus obtain $J_n \subset J_{(n)}$, just as desired.}. But $J_{(n)} \subset K_n + T_{n-1}$, so we have established the condition \eqref{1} which is equivalent to the Birkhoff--Witt property.
\end{proof}

\section*{4. Lie Structures over Free Modules}

Before investigating Lie structures over arbitrary modules $M$ we need to know the situation for the special case when $M$ is free. Our next theorem, combined with Theorem 2, gives a new proof of the Birkhoff--Witt theorem in this case.

\begin{theorem}
Let $M$ be a free $R$-module. Then every Lie structure $(A, \langle, \rangle)$ over $M$ can be embedded in an associative structure $(B, (,))$ over $M$ so that $\langle x, y \rangle = (x, y) - (y, x)$.
\end{theorem}

\begin{proof}
Let $A$ be a given Lie structure and put $B = A \oplus S(M)$. We shall show how to make $B$ an associative structure with the required property. Let $X$ be a basis for $M$ over $R$ and take a fixed total ordering $\leq$ of $X$. If $x_i \in X$ we denote by $\xi_i$ its image in $S(M) = S$. Then $S$ has a basis consisting of all products $\xi_1 \xi_2 \cdots \xi_n$ ($n \geq 0$), where the $x_i$ are in $X$ and $x_1 \leq x_2 \leq \cdots \leq x_n$.

We make $B$ a $T$-bimodule as follows. The action of $T$ on $A$ is to be the given action. To define the action of $T$ on $S$ we need only define maps $M \otimes S \to B$ ($m \otimes \sigma \to m\sigma$) and $S \otimes M \to B$ ($\sigma \otimes m \to \sigma m$) such that $(m\sigma) n = m(\sigma n)$ for all $m, n \in M$, $\sigma \in S$. Since $M$ is free we can define $x\sigma$ and $\sigma y$ arbitrarily in $B$ for $x, y \in X$ and $\sigma$ a basis element $\xi_1 \xi_2 \cdots \xi_n$ of $S$, and we need only check that
\begin{equation}
(x\sigma) y = x (\sigma y) \label{2}\tag{2}
\end{equation}
in this case. So let $\sigma = \xi_1 \xi_2 \cdots \xi_n$ ($x_1 \leq x_2 \leq \cdots \leq x_n$ in $X$) and let $\xi$, $\eta$ be the images of $x$, $y$ in $S$. We define
\[
x\sigma = x \circ \sigma + \xi\sigma, \qquad \sigma y = \sigma \circ y + \sigma\eta,
\]
where $\xi\sigma$, $\sigma\eta$ are products in $S$ and $x \circ \sigma$, $\sigma \circ y$ are the elements of $A$ defined by
\begin{align*}
x \circ \sigma &= \sum_{x_i < x} x_1 x_2 \cdots x_{i-1} \langle x, x_i \rangle x_{i+1} \cdots x_n,\\
\sigma \circ y &= \sum_{x_i > y} x_1 x_2 \cdots x_{i-1} \langle x_i, y \rangle x_{i+1} \cdots x_n,
\end{align*}
with the convention that $x \circ 1 = 1 \circ y = 0$. (These definitions are just an imitation of the operations in the associative structure $T(M)$ in terms of the splitting $T(M) = K(M) + S(M)$). Since $(\xi\sigma)\eta = \xi(\sigma\eta)$ in $S$, equation \eqref{2} is equivalent to
\begin{equation}
(\xi\sigma) \circ y - x(\sigma \circ y) = x \circ (\sigma\eta) - (x \circ \sigma) y. \label{3}\tag{3}
\end{equation}
\footnote{\textit{Comment by DG:} The following proof of \eqref{3} is rather heavy on the reader's concentration. An alternative proof, found by GPT-5.5, is included in the Appendix below.} To simplify the notation, let $\tau = \xi\sigma\eta$ (product in $S$), and rename $x$, $y$ and $x_1, x_2, \ldots, x_n$ so that $\tau = \eta_1 \eta_2 \cdots \eta_k$ with\footnote{\textit{Comment by DG:} The $\eta_i$ are understood to be images of the $y_i$ in $S(M) = S$.} $y_1 \leq y_2 \leq \cdots \leq y_k$ ($k = n + 2$) and $x = y_r$, $y = y_s$ ($r \neq s$). Then $\sigma = (\eta_1 \eta_2 \cdots \eta_k)_{\widehat{r} \widehat{s}}$, where the subscripts $\widehat{r}$, $\widehat{s}$ denote that the factors with subscripts $r$, $s$ are to be omitted. Also $\xi\sigma = (\eta_1 \eta_2 \cdots \eta_k)_{\widehat{s}}$ and $\sigma\eta = (\eta_1 \eta_2 \cdots \eta_k)_{\widehat{r}}$. Writing\footnote{\textit{Comment by DG:} The ``$p$'' and ``$q$'' in this notation were called ``$r$'' and ``$s$'' in the original. I renamed them to disambiguate them from the $r$ and $s$ already defined.} $\epsilon_{pq} = 0$ if $p \leq q$ and $\epsilon_{pq} = 1$ if $p > q$, the left hand side of \eqref{3} becomes
\begin{align*}
(\xi\sigma) \circ y - x(\sigma \circ y) ={} &\sum_j \epsilon_{js} (y_1 \cdots y_{j-1} \langle y_j, y_s \rangle y_{j+1} \cdots y_k)_{\widehat{s}} \\
&- y_r \sum_{j \neq r} \epsilon_{js} (y_1 \cdots y_{j-1} \langle y_j, y_s \rangle y_{j+1} \cdots y_k)_{\widehat{r}\widehat{s}}.
\end{align*}
The terms $j \neq r$ in the first sum appear in the second sum with $y_r$ moved to the left hand end. This move can be accomplished by adding terms containing commutators $y_i y_r - y_r y_i$ and possibly $\langle y_j, y_s \rangle y_r - y_r \langle y_j, y_s \rangle$. We therefore have $(\xi\sigma) \circ y - x(\sigma \circ y) = U + V + W$, where
\begin{align*}
U ={}& \epsilon_{rs} (y_1 \cdots y_{r-1} \langle y_r, y_s \rangle y_{r+1} \cdots y_k)_{\widehat{s}}, \\
V ={}& -\sum_{j \neq r} \sum_{\substack{i \neq s \\ i \neq j}} \epsilon_{js} \epsilon_{ri} (y_1 \cdots y_{i-1} (y_r y_i - y_i y_r) y_{i+1} \cdots y_{j-1} \langle y_j, y_s \rangle y_{j+1} \cdots y_k)_{\widehat{r}\widehat{s}}, \\
W ={}& \sum_j \epsilon_{js} \epsilon_{rj} (y_1 \cdots y_{j-1} (\langle y_j, y_s \rangle y_r - y_r \langle y_j, y_s \rangle) y_{j+1} \cdots y_k)_{\widehat{r}\widehat{s}}.
\end{align*}
(The notation in $V$ is not meant to imply that $i < j$). Similarly, the right hand side of \eqref{3} is $U' + V' + W'$, where
\begin{align*}
U' ={}& \epsilon_{rs} (y_1 \cdots y_{s-1} \langle y_r, y_s \rangle y_{s+1} \cdots y_k)_{\widehat{r}}, \\
V' ={}& -\sum_{i \neq s} \sum_{\substack{j \neq r \\ j \neq i}} \epsilon_{ri} \epsilon_{js} (y_1 \cdots y_{i-1} \langle y_r, y_i \rangle y_{i+1} \cdots y_{j-1} (y_j y_s - y_s y_j) y_{j+1} \cdots y_k)_{\widehat{r}\widehat{s}}, \\
W' ={}& \sum_j \epsilon_{rj} \epsilon_{js} (y_1 \cdots y_{j-1} (y_s \langle y_r, y_j \rangle - \langle y_r, y_j \rangle y_s) y_{j+1} \cdots y_k)_{\widehat{r}\widehat{s}}.
\end{align*}
Now $V = V'$ by axiom (L2) in $A$. Also, by axiom (L3),
\begin{align*}
W - W' ={}& \sum_{s < j < r} (y_1 \cdots y_{j-1} (y_j \langle y_s, y_r \rangle - \langle y_s, y_r \rangle y_j) y_{j+1} \cdots y_k)_{\widehat{r}\widehat{s}} \\
={}& \epsilon_{rs} \{ (y_1 \cdots y_{r-1} \langle y_s, y_r \rangle y_{r+1} \cdots y_k)_{\widehat{s}} \\
& \quad - (y_1 \cdots y_{s-1} \langle y_s, y_r \rangle y_{s+1} \cdots y_k)_{\widehat{r}} \} \\
={}& U' - U
\end{align*}
since (L1) implies that $\langle y_s, y_r \rangle = -\langle y_r, y_s \rangle$. Thus equation \eqref{3} is satisfied and $B$ is a $T$-bimodule.

Now for $x, y \in X$ we have
\begin{equation}
x\eta = \xi y = \begin{cases} \xi\eta & \text{if } x \leq y \\ \xi\eta + \langle x, y \rangle & \text{if } x \geq y. \end{cases} \label{4}\tag{4}
\end{equation}
We may therefore define $(x, y) = x\eta = \xi y$ for $x, y \in X$, and extend linearly to the whole of $M$. Then $B$ is an associative structure over $M$ since the equation $(x, y) z = x (y, z)$ is $R$-multilinear and is a special case of \eqref{2} when $x, y, z \in X$. Finally, $(x, y) - (y, x) = x\eta - y\xi = \langle x, y \rangle$ by \eqref{4} when $x, y \in X$, and the equality holds for $x, y \in M$ by linearity.
\end{proof}

\begin{corollary}
If $M$ is a free $R$-module then $K(M)$ is the universal Lie structure over $M$.
\end{corollary}

\begin{proof}
Let $(A, \langle, \rangle)$ be any Lie structure over $M$ and embed $A$ in the associative structure $B$ as above. By Theorem 1 there is a map $\theta : M^2 T(M) \to B$ of $T(M)$-bimodules sending $xy$ to $(x, y)$ for $x, y \in M$. Since $(x, y) - (y, x) = \langle x, y \rangle \in A$, $\theta$ induces a map of $T(M)$-bimodules $K(M) \to A$ sending $xy - yx$ to $\langle x, y \rangle$.
\end{proof}

Using Theorem 2 we now obtain

\begin{corollary}
Every Lie algebra over $R$ whose underlying module is free has the Birkhoff--Witt property.
\end{corollary}

\section*{5. Baer Invariants of Tensor Algebras}

We now introduce two invariants of a module $M$ analogous to the invariants $[F, F]/[F, R]$ and $(R \cap [F, F])/[F, R]$ for a group presented as a quotient $F/R$ of a free group. In Fr\"ohlich's notation \cite{frohlich} they are $D_0 V(T(M))$ and $D_1 U(T(M))$, where $U$ and $V$ are the functors on algebras associated with the variety of commutative algebras: $U(T(M)) = S(M)$, $V(T(M)) = K(M)$. We recall briefly their definition and main properties.

We start with a short exact sequence of $R$-modules
\begin{equation}
0 \to Q \to P \to M \to 0 \label{5}\tag{5}
\end{equation}
with $P$ projective, and we make identifications so that $Q \subset P \subset T(P)$. Then we have a commutative diagram with exact rows and columns
\[
\begin{tikzcd}
& 0 \arrow[d] & 0 \arrow[d] & 0 \arrow[d] & \\
0 \arrow[r] & K(P) \cap \overline{Q} \arrow[r] \arrow[d] & \overline{Q} \arrow[r] \arrow[d] & \overline{Q}^* \arrow[r] \arrow[d] & 0 \\
0 \arrow[r] & K(P) \arrow[r] \arrow[d] & T(P) \arrow[r] \arrow[d] & S(P) \arrow[r] \arrow[d] & 0 \\
0 \arrow[r] & K(M) \arrow[r] \arrow[d] & T(M) \arrow[r] \arrow[d] & S(M) \arrow[r] \arrow[d] & 0 \\
& 0 & 0 & 0 &
\end{tikzcd}
\]
where $\overline{Q}$ is the ideal $T(P) Q T(P)$ of $T(P)$ generated by $Q$, and $\overline{Q}^*$ is its image in $S(P)$\ \ \ \ \footnote{\textit{Comment by DG:} The exactness of the rows is obvious. The exactness of the second and third columns follows from known results (Theorem 32 and Theorem 62 in \href{https://www.cip.ifi.lmu.de/~grinberg/algebra/tensorext.pdf}{Darij Grinberg, \textit{A few classical results on tensor, symmetric and exterior powers}, 17 April 2026}, to name just one reference). The exactness of the first column follows from the exactness of the second.}. Let $W$ be the set of words
\[
w_i(\mathbf{x}) = w_i(x_1, x_2, \ldots, x_n) = x_1 x_2 \cdots x_{i-1} [x_i, x_{i+1}] x_{i+2} \cdots x_n
\]
($n \geq 2$, $1 \leq i \leq n - 1$), where $[a, b]$ denotes, as always from now on, the additive commutator $ab - ba$.
If the $x_j$ take values running through $P$ then $W$ generates $K(P)$ as additive group. Let $W'$ be the set of derived words $w_i'(\mathbf{x}, \mathbf{y}) = w_i(\mathbf{x} + \mathbf{y}) - w_i(\mathbf{x})$.
If the $x_j$ run through $P$ and the $y_j$ run through $Q$, $W'$ generates (as additive group) an $R$-module $Z$.
Clearly the image of any such element $w_i'(\mathbf{p}, \mathbf{q})$ in $T(M)$ or in $S(P)$ is $0$, so $Z \subset K(P) \cap \overline{Q}$, and it is not difficult to check that in fact $Z$ is the ideal of $T(P)$ generated by all $[t, q]$ with $t \in T(P)$ and $q \in \overline{Q}$\ \ \ \ \footnote{\textit{Comment by DG:} In the original: ``$Z = [T(P), \overline{Q}]$, the ideal of $T(P)$ generated by all $[t, q]$ ($t \in T(P)$, $q \in \overline{Q}$)''. But the notation $[T(P), \overline{Q}]$ for this is rather nonstandard; usually it would mean just the $R$-linear span of all commutators $[t, q]$ with $t \in T(P)$ and $q \in \overline{Q}$, and this span does not equal $Z$. \par
Here is a brief outline of the proof:
It is easy to see from the definition that
\[
Z = \operatorname{span}\{w_i(x_1, x_2, \ldots, x_n)\ \mid\ \text{each } x_j \text{ belongs to } P \text{, and at least one } x_j \text{ belongs to } Q\}.
\]
(Indeed, this is perhaps the better definition of $Z$.)
This shows that $Z$ is an ideal of $T(P)$.
Now, let $\widetilde{Z}$ be the ideal of $T(P)$ generated by all $[t, q]$ with $t \in T(P)$ and $q \in \overline{Q}$.
We must prove that $Z = \widetilde{Z}$.
We shall achieve this by proving both $\widetilde{Z} \subseteq Z$ and $Z \subseteq \widetilde{Z}$:
\begin{enumerate}
\item To prove $\widetilde{Z} \subseteq Z$, we need to show that every $t \in T(P)$ and $q \in \overline{Q}$ satisfy $[t, q] \in Z$
(since $Z$ is an ideal of $T(P)$). By linearity, we can assume that $t = x_1 x_2 \cdots x_n$ with $x_1, x_2, \ldots, x_n \in P$, and that $q = a q' b$ with $a = y_1y_2 \cdots y_k \in T(P)$ and $b = y_{k+1}y_{k+2} \cdots y_\ell \in T(P)$ and $q' \in Q$. But then we have
\[
[t, q] = [t, aq'b] = [t,a] q'b + a [t,q'] b + aq' [t,b]
\]
and
\[
[t, q'] = [x_1 x_2 \cdots x_n, q'] = \sum_i x_1 x_2 \cdots x_{i-1} [x_i, q'] x_{i+1} x_{i+2} \cdots x_n.
\]
Substituting the latter equality into the former, we obtain
\[
[t, q] = [t,a] q'b + a \left(\sum_i x_1 x_2 \cdots x_{i-1} [x_i, q'] x_{i+1} x_{i+2} \cdots x_n\right) b + aq' [t,b].
\]
All addends on the right hand side (once expanded) are of the form $w_i(x_1, x_2, \ldots, x_n)$ where each $x_j$ belongs to $P$ and at least one $x_j$ belongs to $Q$.
Thus, they belong to $Z$. Hence, $[t, q] \in Z$. So we have shown that $\widetilde{Z} \subseteq Z$.
\item In order to prove the reverse inclusion $Z \subseteq \widetilde{Z}$, we must show that each $w_i(x_1, x_2, \ldots, x_n)$ with all $x_j \in P$ and at least one $x_j \in Q$ belongs to $\widetilde{Z}$.
This is clear if the one $x_j$ that belongs to $Q$ is $x_i$ or $x_{i+1}$, because in this case the $[x_i, x_{i+1}]$ bracket in the middle of $w_i(x_1, x_2, \ldots, x_n)$ itself has the form $[t, q]$ with $t \in T(P)$ and $q \in \overline{Q}$ (actually, $q = x_j \in Q$).
In all other cases, we first move this one $x_j$ to a position either just left or just right of the $[x_i, x_{i+1}]$ bracket by successively commuting it past the other $x_k$'s (the commutators will lie in $\widetilde{Z}$), and then use $x_j [x_i, x_{i+1}] = [x_j x_i, x_{i+1}] - [x_j, x_{i+1}] x_i$ (if $x_j$ ended up left of the bracket) or $[x_i, x_{i+1}] x_j = [x_i, x_{i+1} x_j] - x_{i+1} [x_i, x_j]$ (if on the right), obtaining a right hand side belonging to $\widetilde{Z}$ in both cases (since $x_j$, $x_j x_i$ and $x_{i+1} x_j$ all belong to $\overline{Q}$).
\end{enumerate}
}.
We write
\[
B(M) = (K(P) \cap \overline{Q})/Z
\qquad \text{ and } \qquad
C(M) = K(P)/Z .
\]
(These are respectively $D_1 U T(M)$ and $D_0 V T(M)$).

Now $K(P)$ and $\overline{Q}$ and $Z$ are $T(P)$-bimodules and satisfy\footnote{\textit{Comment by DG:} Added ``and $Z$'' as well as a few filler words (to avoid overfull hbox).} $K(P) \overline{Q} + \overline{Q} K(P) \subset Z$ (because e.g. $p_1 p_2 \cdots [p_i, p_{i+1}] \cdots q \cdots p_n$ is of the form $w_i'(\mathbf{p}, \mathbf{q})$). Hence $B(M)$ and $C(M)$ are $T(M)$-bimodules, and we have an exact sequence of $T(M)$-bimodules:
\begin{equation}
0 \to B(M) \to C(M) \to K(M) \to 0. \label{6}\tag{6}
\end{equation}

As the notation suggests, $B(M)$ and $C(M)$ depend only on $M$ and not on its presentation \eqref{5}. To see this directly, let $0 \to Q' \to P' \to M \to 0$ be another presentation of $M$ with $P'$ projective. Then there are $R$-linear maps $\sigma$, $\sigma_*$ making the following diagram commute:
\[
\begin{tikzcd}
0 \arrow[r] & Q \arrow[r] \arrow[d, "\sigma_*"] & P \arrow[r] \arrow[d, "\sigma"] & M \arrow[r] \arrow[d, "1_M"] & 0 \\
0 \arrow[r] & Q' \arrow[r] & P' \arrow[r] & M \arrow[r] & 0.
\end{tikzcd}
\]
The map $T(\sigma) : T(P) \to T(P')$ is an algebra homomorphism and sends $\overline{Q}$ into $\overline{Q'}$. Also, since $K(P)$ and $Z$ are defined by algebra words, $T(\sigma)$ sends $K(P)$ into $K(P')$ and $Z$ into $Z'$. Hence $\sigma$ induces maps $B(M) \to B'(M)$ and $C(M) \to C'(M)$ (in the obvious notation). Similarly, there is a map $\tau : P' \to P$ inducing maps in the opposite direction. It is enough, therefore, to show that if $\rho : P \to P$ induces the identity map on $M$ then it induces the identity map on $B(M)$ and on $C(M)$. But this is clear since if $u \in K(P)$ then $u$ is a sum of elements $w_i(\mathbf{p})$ ($p_j \in P$), and so $u\rho - u = \sum (w_i(\mathbf{p}\rho) - w_i(\mathbf{p})) \in Z$ (because $p_j \rho - p_j \in Q$). A similar argument with $1_M$ replaced by an arbitrary map of $R$-modules shows that $B$ and $C$ are functors from $R$-modules to $R$-modules\footnote{\textit{Comment by DG:} Let me expand upon this somewhat.
Let me show that $B$ is a functor (the proof for $C$ is analogous).
Each $R$-module $M$ has a canonical presentation $0 \to Q_M \to P_M \to M \to 0$, where $P_M$ is the free $R$-module with $M$ as a basis, where $P_M \to M$ is the canonical $R$-module epimorphism that sends each basis element corresponding to some $m \in M$ to the respective $m$ itself, and where $Q_M$ is the kernel of this epimorphism.
We can define $B(M)$ canonically (not just up to isomorphism) using this presentation.
To show that this gives a functor, we define $B(f) : B(M) \to B(N)$ for any $R$-linear map $f : M \to N$ as follows:
Lift $f$ to an $R$-linear map $P_f : P_M \to P_N$ (this lift is not canonical, but exists because $P_M$ is free and thus projective). Then, $P_f$ sends $Q_M$ to $Q_N$, and thus we obtain a commutative diagram
\[
\xymatrix{
0 \ar[r] & Q_M \ar[r] \ar[d]_{Q_f} & P_M \ar[r] \ar[d]_{P_f} & M \ar[r] \ar[d]_f & 0 \\
0 \ar[r] & Q_N \ar[r] & P_N \ar[r] & N \ar[r] & 0.
}
\]
This leads to an algebra homomorphism $T(P_f) : T(P_M) \to T(P_N)$ that sends $K(P_M)$ to $K(P_N)$, sends $Z_M$ to $Z_N$, and sends $\overline{Q}_M$ to $\overline{Q}_N$, and thus gives rise to an $R$-linear map $B(f) : B(M) \to B(N)$.
It remains to show that this $B(f)$ depends only on $f$ and not on its lift $P_f$ (functoriality will then follow immediately, since $T(P_f)$ is functorial in $P_f$).
But this is clear since, if $P_{f1}$ and $P_{f2}$ are two candidates for the lift $P_f$, then each element $u \in K(P_M)$ is a sum of elements $w_i(\mathbf{p})$ with $p_j \in P_M$, and so $u\, T(P_{f1}) - u \,T(P_{f2}) = \sum (w_i(\mathbf{p}\, P_{f1}) - w_i(\mathbf{p}\, P_{f2})) \in Z$ (because $p_j\, P_{f1} - p_j\, P_{f2} \in Q_N$).}.

We now show that $C(M) = K(P)/Z$ is a Lie structure over $M$ in a natural way. We have already shown that $C(M)$ is a $T(M)$-bimodule. We also know that $K(P)$ is a Lie structure over $P$ with respect to the operation $[x, y] = xy - yx$ ($x, y \in P$). Suppose that $x \equiv x' \pmod{Q}$ and $y \equiv y' \pmod{Q}$. Then $[x, y] \equiv [x', y'] \pmod{Z}$, so for $\xi, \eta \in M$ we may define $[\![\xi, \eta]\!] \in C(M) = K(P)/Z$ to be the image in $C(M)$ of $[x, y]$, where $x, y \in P$ have images $\xi, \eta$ in $M$. The axioms for a Lie structure hold in $C(M)$ over $M$ because they hold in $K(P)$ over $P$.

\begin{theorem}
For any $R$-module $M$, $(C(M), [\![,]\!])$ is the universal Lie structure over $M$.
\end{theorem}

\begin{proof}
Let $(A, \langle, \rangle)$ be any Lie structure over $M$. In constructing $C(M)$ we may choose $P$ to be a free $R$-module, and in this case we know (Theorem 3, Corollary 1) that $(K(P), [,])$ is the universal Lie structure over $P$. Now $A$ can be viewed as a Lie structure over $P$ \textit{via} the map $\theta : P \to M$ of the presentation, so there is a unique map $\alpha : K(P) \to A$ of $T(P)$-bimodules sending $[x, y]$ to $\langle x\theta, y\theta \rangle$ for all $x, y \in P$. If $x_1, x_2, \ldots, x_n \in P$ then
\begin{align*}
w_i(x_1, x_2, \ldots, x_n) \alpha &= (x_1 \cdots x_{i-1} [x_i, x_{i+1}] x_{i+2} \cdots x_n) \alpha \\
&= (x_1 \theta) \cdots (x_{i-1} \theta) \langle x_i \theta, x_{i+1} \theta \rangle (x_{i+2} \theta) \cdots (x_n \theta).
\end{align*}
Hence, for $q_1, q_2, \ldots, q_n \in Q$, $(w_i(\mathbf{x} + \mathbf{q}) - w_i(\mathbf{x})) \alpha = 0$, i.e. $Z \subset \operatorname{Ker} \alpha$. Thus $\alpha$ induces a map $\beta : C(M) = K(P)/Z \to A$\ \ \ \ \footnote{\textit{Comment by DG:} The original says ``$\beta : C(M) = K(M) / Z \to A$''. Claude corrected this autonomously.} sending $[\![\xi, \eta]\!]$ to $\langle \xi, \eta \rangle$ for all $\xi, \eta \in M$. It is easy to see that $\beta$ is a morphism of $T(M)$-bimodules, and it is unique since the $[x, y]$ generate $K(P)$ as $T(P)$-bimodule and therefore the $[\![\xi, \eta]\!]$ generate $C(M)$ as $T(M)$-bimodule.
\end{proof}

The canonical maps $C(M) \to L(M)$ given by this theorem form a natural equivalence of functors $C \simeq L$. We may therefore write $L(M)$ for $C(M)$ from now on, and we have the exact sequence
\begin{equation}
0 \to B(M) \to L(M) \to K(M) \to 0 \label{6b}\tag{6$'$}
\end{equation}
for all $R$-modules $M$. Clearly this gives an exact sequence of functors $0 \to B \to L \to K \to 0$, and combining it with $0 \to K \to T \to S \to 0$ we obtain the exact sequence of functors
\begin{equation}
0 \to B \to L \to T \to S \to 0. \label{7}\tag{7}
\end{equation}

\begin{theorem}
For any $R$-module $M$ the following are equivalent:
\begin{itemize}
\item[\textrm{(i)}] $B(M) = 0$;
\item[\textrm{(ii)}] $(K(M), [,])$ is the universal Lie structure over $M$;
\item[\textrm{(iii)}] every Lie structure over $M$ is embeddable in an associative structure over $M$.
\end{itemize}
\end{theorem}

\begin{proof}
The equivalence of (i) and (ii) follows from the exact sequence \eqref{6b} (which is a consequence of Theorem 4).

(ii) $\Rightarrow$ (iii). Let $(A, \langle, \rangle)$ be any Lie structure over $M$ and let $A_0$ be the $T(M)$-bimodule generated in $A$ by the elements $\langle x, y \rangle$ ($x, y \in M$). If (ii) holds then there is a unique morphism $\alpha : K(M) \to A$ of $T(M)$-bimodules sending $[x, y]$ to $\langle x, y \rangle$. The kernel $D$ of $\alpha$ is a $T(M)$-bimodule, i.e.\ an ideal of $T(M)$. The algebra $A^* = T(M)/D$ is an associative structure over $M$ and the Lie structure $A_0 \cong K(M)/D$ is embedded in it. To extend this to an embedding of $A$ itself is a trivial matter. Any $T(M)$-bimodule containing $A^*$ is also an associative structure over $M$, so we need only form the fibre coproduct of $A$ and $A^*$ with respect to the embeddings $A_0 \to A$ and $A_0 \to A^*$. It is clear that $\langle x, y \rangle$ goes to $xy - yx$ in the resulting embedding of $A$.

(iii) $\Rightarrow$ (ii). If (iii) holds then the universal Lie structure $L = (L(M), \langle, \rangle)$ is embeddable in an associative structure $(L^*, (,))$ so that $(x, y) - (y, x) = \langle x, y \rangle$. By Theorem 1, there is a morphism of $T(M)$-bimodules $\alpha : M^2 T(M) \to L^*$ sending $xy$ to $(x, y)$, and this induces a morphism $\beta : K(M) \to L$ sending $xy - yx$ to $\langle x, y \rangle$. Clearly $\beta$ is inverse to the canonical map $L(M) \to K(M)$, so $K(M) \cong L(M)$.
\end{proof}

\section*{6. Modules with $B(M) = 0$}

Our main result is an immediate consequence of Theorems 2 and 5:

\begin{theorem}
If $B(M) = 0$ for the $R$-module $M$ then the Birkhoff--Witt theorem holds for all Lie algebras over $R$ with underlying module $M$.
\end{theorem}

To show that this theorem contains the known results quoted in the introduction we now look for conditions on the $R$-module $M$ which ensure that $B(M) = 0$.

\begin{theorem}
Let $R$ be a fixed commutative ring and let $M$ be any $R$-module.
\begin{itemize}
\item[(i)] If $M$ is $R$-projective then $B(M) = 0$.
\item[(ii)] If $M$ is uniquely divisible as Abelian group (i.e.\ $M$ is a rational vector space) then $B(M) = 0$.
\item[(iii)] If $M$ is a direct sum of cyclic (i.e.\ one-generator) modules then $B(M) = 0$.
\end{itemize}
\end{theorem}

\begin{proof}
Let $0 \to Q \to P \to M \to 0$ be a presentation of $M$ with $P$ projective. Then, in the notation of Section 5, $B(M) = (K(P) \cap \overline{Q})/Z$. Item (i) is clear since if $M$ is projective we may take $P = M$ and $Q = 0$. To prove (ii) and (iii) we first observe that $K(P)$, $\overline{Q}$ and $Z$ are homogeneous ideals of $T(P)$, so it is enough to take $u \in K(P) \cap \overline{Q}$ homogeneous of degree $n$ ($n \geq 2$) and show that $u \in Z$. Now the symmetric group $\mathscr{S}_n$ acts on the homogeneous part $\otimes^n P$ of $T(P)$, and if $\pi \in \mathscr{S}_n$, $u \in \otimes^n P$, then $u - u\pi \in K(P)$. Moreover, if $u \in \overline{Q}$, then $u - u\pi$ is a sum of elements of type $p_1 p_2 \cdots p_{i-1} [p_i, q] p_{i+2} \cdots p_n$ or $p_1 \cdots p_{i-1} [p_i, p_{i+1}] p_{i+2} \cdots q \cdots p_n$, where $p_j \in P$ and $q \in Q$. All such elements are in $Z$, by definition, so $u - u\pi \in Z$ whenever $u \in \overline{Q} \cap \otimes^n P$. On the other hand, if $u \in K(P)$ and is homogeneous of degree $n$ then $u$ is a sum of elements of the form $v - v\tau$, where $\tau \in \mathscr{S}_n$ is a transposition. Hence $\sum_{\pi \in \mathscr{S}_n} u\pi = \sum_{\pi \in \mathscr{S}_n} (v\pi - v\tau\pi) = 0$ in this case. Thus, for any $u \in \overline{Q} \cap K(P) \cap \otimes^n P$, we have $n! u = \sum_{\pi \in \mathscr{S}_n} (u - u\pi) \in Z$. This shows that $B(M)$ is always a torsion group. It is graded by degree: $B(M) = \bigoplus B^n(M)$, and $n! B^n(M) = 0$.

Suppose now that $M$ is uniquely divisible. Then for each integer $k > 0$ we have an isomorphism $k : M \to M$ ($x \mapsto kx$). Since $B$ is a functor this induces an isomorphism $B(k) : B(M) \to B(M)$ which in dimension $n$ is multiplication by $k^n$. Taking $k = n!$ we see that $B^n(M) = (n!)^n B^n(M) = 0$\ \ \ \ \footnote{\textit{Comment by DG:} Claude fixed a typo here (``$B^n(m)$'').}, which proves (ii).

To prove (iii) we suppose that $M = \bigoplus_{x \in X} M_x$, where $M_x = R/I_x$ is cyclic, and we take $P$ to be the free $R$-module on $X$ with the obvious map $P \to M$. Then $Q$ is spanned by certain elements of the form $\lambda x$, where $\lambda \in R$ and $x \in X$. We take a fixed total ordering $\leq$ of $X$ and denote by $S^*$ the $R$-submodule of $T(P)$ spanned by all products $x_1 x_2 \cdots x_n$ with $x_i \in X$ and $x_1 \leq x_2 \leq \cdots \leq x_n$ ($n \geq 0$). Then there is an $R$-linear map $\theta : T(P) \to S^*$ which sends any product of $x$'s to\footnote{\textit{Comment by DG:} In the original ``to be''.} the product of the same $x$'s in correct order. The kernel of $\theta$ is exactly $K(P)$. Suppose now that $u \in \overline{Q} \cap \otimes^n P$. Because of the special form of our presentation we have $u = u_1 + u_2 + \cdots + u_k$ where each $u_i$ is still in $\overline{Q}$ and is of the form $\lambda x_1 x_2 \cdots x_n$ with $\lambda \in R$ and $x_1, x_2, \ldots, x_n \in X$. Then $u_i \theta = u_i \pi_i$ for some $\pi_i \in \mathscr{S}_n$, so $u_i - u_i \theta = u_i - u_i \pi_i \in Z$, as we have already shown. Hence $u - u\theta \in Z$. If now $u \in \overline{Q} \cap K(P) \cap \otimes^n P$ then $u\theta = 0$, and we have $u \in Z$ as required.
\end{proof}

\begin{corollary*}
If $R$ is the direct sum of a finite number of fields or is an algebra over the rationals then $B(M) = 0$ for all $R$-modules $M$. If $R$ is a principal ideal domain then $B(M) = 0$ for all finitely generated $R$-modules $M$.
\end{corollary*}

We can extend this last result by general arguments as follows.

\begin{theorem}
If $\{M_\alpha\}$ is a directed system of $R$-modules, and $M = \varinjlim M_\alpha$, then $B(M) = \varinjlim B(M_\alpha)$.
\end{theorem}

\begin{proof}
The exact sequence of functors \eqref{7} gives rise to a directed system of exact sequences
\[
0 \to B(M_\alpha) \to L(M_\alpha) \to T(M_\alpha) \to S(M_\alpha) \to 0.
\]
Since $\varinjlim$ is an exact functor for $R$-modules, we obtain a commutative diagram
\[
\begin{tikzcd}[column sep=small]
0 \arrow[r] & \varinjlim B(M_\alpha) \arrow[r] \arrow[d] & \varinjlim L(M_\alpha) \arrow[r] \arrow[d, "\lambda"] & \varinjlim T(M_\alpha) \arrow[r] \arrow[d, "\tau"] & \varinjlim S(M_\alpha) \arrow[r] \arrow[d, "\sigma"] & 0 \\
0 \arrow[r] & B(M) \arrow[r] & L(M) \arrow[r] & T(M) \arrow[r] & S(M) \arrow[r] & 0
\end{tikzcd}
\]
with exact rows, the limits in the upper row being taken in the category of $R$-modules. It is enough, therefore, to show that $\lambda$, $\tau$, $\sigma$ are isomorphisms. We give the proof for $\lambda$; the other cases are proved by similar ``general nonsense'' and are in any case well known.

$L_\alpha = L(M_\alpha)$ is the universal Lie structure over $M_\alpha$. Its structure is given by canonical maps $M_\alpha \otimes M_\alpha \to L_\alpha$, $L_\alpha \otimes M_\alpha \to L_\alpha$ and $M_\alpha \otimes L_\alpha \to L_\alpha$ satisfying axioms (L1), (L2), (L3). If $\theta : M_\alpha \to M_\beta$ is $R$-linear then $\theta^* = L(\theta) : L_\alpha \to L_\beta$ is obtained by viewing $L_\beta$ as a Lie structure over $M_\alpha$ \textit{via} the map $\theta$. It is therefore not only $R$-linear but is compatible with the structure maps, that is, for $x, y \in M_\alpha$ and $a \in L_\alpha$ we have\footnote{\textit{Comment by DG:} Missing subscripts inserted in ``$x, y \in M$ and $a \in L$''.} $\langle x, y \rangle \theta^* = \langle x\theta, y\theta \rangle$, $(ax) \theta^* = (a\theta^*)(x\theta)$ and $(xa) \theta^* = (x\theta)(a\theta^*)$. Hence, writing $\Lambda = \varinjlim L_\alpha$, the structure maps of the various $L_\alpha$ induce maps
\[
M \otimes M = \varinjlim (M_\alpha \otimes M_\alpha) \to \Lambda, \qquad \Lambda \otimes M = \varinjlim (L_\alpha \otimes M_\alpha) \to \Lambda
\]
and $M \otimes \Lambda \to \Lambda$. The axioms (L1), (L2), (L3) carry over to the limits since each axiom involves only a finite number of symbols and therefore each instance of it is implied by the corresponding axiom for some pair $M_\alpha, L_\alpha$. Thus $\Lambda$ is in a canonical way a Lie structure over $M$. If now $A$ is any Lie structure over $M$ then $A$ can be viewed as a Lie structure over $M_\alpha$. Hence there is a unique morphism of Lie structures $L_\alpha \to A$ for each $\alpha$. These induce a unique morphism $\Lambda \to A$ of Lie structures over $M$ which, in the particular case $A = L(M)$, is the map $\lambda$. The standard argument for universal objects now shows that $\lambda$ is an isomorphism.
\end{proof}

\begin{corollary*}
If $R$ is a principal ideal domain then $B(M) = 0$ for all $R$-modules $M$.
\end{corollary*}

Finally, we consider change-of-ring arguments. If $R'$ is a commutative $R$-algebra and $M$ is an $R$-module then $M' = M \otimes_R R'$ is an $R'$-module and we may form its Baer invariant as such. We write $B_{R'}(M')$ to indicate that we are calculating with $R'$-modules.

\begin{theorem}
If the $R$-algebra $R'$ is flat over $R$ then $B_{R'}(M \otimes_R R') = B_R(M) \otimes_R R'$.
\end{theorem}

\begin{proof}
The argument is similar to the one given for direct limits. Since $R'$ is flat over $R$ we have, for any $R$-module $M$, an exact sequence of $R'$-modules
\[
0 \to B_R(M) \otimes_R R' \to L_R(M) \otimes_R R' \to T_R(M) \otimes_R R' \to S_R(M) \otimes_R R' \to 0.
\]
We write $M' = M \otimes_R R'$, $L = L_R(M)$, $L' = L_R(M) \otimes_R R'$. The structure maps for $L$, namely the maps\footnote{\textit{Comment by DG:} Added ``namely the maps'' to avoid a false impression that had me confused too.} $M \otimes_R M \to L$, $L \otimes_R M \to L$, $M \otimes_R L \to L$, induce $R'$-linear maps $M' \otimes_{R'} M' \to L'$ etc.\ which clearly make $L'$ a Lie structure over $M'$, and it is easy to check that $L'$ is then the universal Lie structure $L_{R'}(M')$. Similarly, we may identify $T_R(M) \otimes_R R'$ with $T_{R'}(M')$ and $S_R(M) \otimes_R R'$ with $S_{R'}(M')$, and the theorem follows.
\end{proof}

In particular, the local ring $R_\mathfrak{p}$ at a prime ideal $\mathfrak{p}$ of $R$ is flat over $R$ (see, for example, Nagata \cite{nagata}, (6.18) on\footnote{\textit{Comment by DG:} Added ``(6.18) on'' to avoid relying on page numbers.} p.~19). Writing $M_\mathfrak{p}$ for $M \otimes_R R_\mathfrak{p}$ we therefore have the following.

\setcounter{corollary}{0}
\begin{corollary}
For any $R$-module $M$ and any prime ideal $\mathfrak{p}$ of $R$, $B_{R_\mathfrak{p}}(M_\mathfrak{p}) = (B_R(M))_\mathfrak{p}$. Hence $B_R(M) = 0$ if and only if $B_{R_\mathfrak{p}}(M_\mathfrak{p}) = 0$ for all prime ideals (or all maximal ideals) $\mathfrak{p}$.
\end{corollary}

Since the local rings of a Dedekind domain are principal ideal domains, the corollary to Theorem 8 now gives

\begin{corollary}
If $R$ is a Dedekind domain then $B(M) = 0$ for all $R$-modules $M$.
\end{corollary}


\bigskip

\appendix

% \section*{Appendix 1 (mostly by GPT-5.5). A cleaner proof of equation \eqref{3}}

% This appendix gives a less index-heavy proof of equation \eqref{3} from Section~4.
% We keep the notation used there.  Thus $X$ is a totally ordered basis of $M$,
% and $x_1 \leq x_2 \leq \cdots \leq x_n$ are $n$ elements of $X$, and
% $\sigma=\xi_1\xi_2\cdots\xi_n$ is a basis monomial of $S(M)$ with
% each $\xi_i$ being the image of $x_i$ in $S(M)$.
% The elements $x\circ\sigma$ and $\sigma\circ y$ are those defined just before
% \eqref{3}.

% We shall use a normal-ordering argument, using the language of rewrite
% systems \cite{baadernipkow, newman}.  If $w=z_1z_2\cdots z_m$ is a word
% in letters from $X$, then the \textit{nondecreasing rearrangement} of $w$ means
% the unique word
% \[
        % w^\uparrow=z'_1z'_2\cdots z'_m
% \]
% that is obtained from $w$ by permuting the letters (i.e., the letters of
% $w^\uparrow$ are the same as those of $w$, counted with multiplicity) and
% which satisfies
% \[
        % z'_1\leq z'_2\leq\cdots\leq z'_m.
% \]
% Thus $w^\uparrow$ is obtained by sorting the letters of $w$ into nondecreasing
% order.  This can be done by repeated adjacent interchanges
% \begin{align}
        % ab \longmapsto ba \qquad (a>b).
        % \label{8} \tag{8}
% \end{align}
% The elementary fact that the symmetric group is generated by the adjacent
% transpositions $s_i=(i,i+1)$ explains why adjacent interchanges are sufficient;
% more precisely, the usual Coxeter presentation of the symmetric group gives the
% commutation and braid moves below (see, for example, \cite[Proposition (SYM.2.6)]{llpt}).
% Equivalently, this is the finite terminating rewriting system whose rewriting
% rule is $\ldots ab \ldots \to \ldots ba \ldots$ ($a>b$) and whose normal form
% is $w^\uparrow$; the use of local diamonds is an instance of Newman's lemma, or
% of the diamond lemma, in this elementary situation \cite{newman}.

% We now spell out the ``correction'' attached to such a sorting process.  Let
% \[
        % W_0 \to W_1 \to \cdots \to W_r
% \]
% be a sorting sequence from $w$ to $w^\uparrow$ (that is, a sequence
% of words in letters from $X$ such that $W_0 = w$ and $W_r = w^\uparrow$
% and such that each $W_\ell$ is obtained from $W_{\ell-1}$ by an
% adjacent interchange of the form \eqref{8}).  Suppose that the
% $\ell$-th step has the form
% \[
        % W_{\ell-1}=p_\ell a_\ell b_\ell q_\ell
        % \to
        % W_\ell=p_\ell b_\ell a_\ell q_\ell,
        % \qquad a_\ell>b_\ell,
% \]
% where $p_\ell$ and $q_\ell$ are possibly empty words in letters from $X$.
% The \textit{correction} contributed by this single interchange is
% defined to be
% \[
        % p_\ell\langle a_\ell,b_\ell\rangle q_\ell \in A.
% \]
% The \textit{total correction} of the sorting sequence is the sum
% \[
        % C(W_0\to\cdots\to W_r)
        % :=\sum_{\ell=1}^r p_\ell\langle a_\ell,b_\ell\rangle q_\ell \in A.
% \]
% Here the words $p_\ell$ and $q_\ell$ act on the element
% $\langle a_\ell,b_\ell\rangle$ by the given $T(M)$-bimodule structure on $A$.
% Thus each summand is obtained from the current word by replacing the adjacent
% inverted pair being swapped by the corresponding bracket.

% For example, if $a<b<c$ in $X$ and $w=cba$, then $w^\uparrow=abc$.  The sorting
% sequence
% \[
        % cba\to bca\to bac\to abc
% \]
% has total correction
% \[
        % \langle c,b\rangle a+b\langle c,a\rangle+\langle b,a\rangle c,
% \]
% whereas the sorting sequence
% \[
        % cba\to cab\to acb\to abc
% \]
% has total correction
% \[
        % c\langle b,a\rangle+\langle c,a\rangle b+a\langle c,b\rangle.
% \]
% The equality of these two corrections is exactly the braid-type local identity
% proved below from (L3).

% We claim that the total correction of a sorting sequence
% from $w$ to $w^\uparrow$ is independent of the chosen sorting
% sequence.  By the Newman/diamond-lemma principle just recalled, we know
% that any two sorting sequences from $w$ to $w^\uparrow$ can be transformed
% into each other by a sequence of commutation moves (i.e., swapping two
% consecutive sorting steps that involve completely disjoint pairs of
% adjacent entries) and braid moves (i.e., replacing a subsequence of the
% form $\ldots abc\ldots \to \ldots bac\ldots \to \ldots bca\ldots \to \ldots cba\ldots $
% with $a>b>c$ by $\ldots abc\ldots \to \ldots acb\ldots \to \ldots cab\ldots \to \ldots cba\ldots $).
% Thus, it suffices to check that each of these moves leaves the total
% correction unchanged.

% First consider two disjoint interchanges, in a word of the form
% \[
        % pabtuvq
% \]
% with $a>b$ and $u>v$, where $p,t,q$ are possibly empty words.  Interchanging
% $ab$ first and $uv$ second gives total correction
% \[
        % p\langle a,b\rangle tuvq+pbat\langle u,v\rangle q,
% \]
% whereas doing the interchanges in the opposite order gives total correction
% \[
        % pabt\langle u,v\rangle q+p\langle a,b\rangle tvuq.
% \]
% These two expressions are equal by (L2), since their difference is
% \[
        % p\left(\langle a,b\rangle t(uv-vu)-(ab-ba)t\langle u,v\rangle\right)q.
% \]

% Next consider the braid move, in a word of the form $pabcq$ with $a>b>c$.  The
% sorting order
% \[
        % abc\to bac\to bca\to cba
% \]
% gives total correction
% \[
        % p\langle a,b\rangle cq+pb\langle a,c\rangle q
        % +p\langle b,c\rangle aq,
% \]
% whereas the sorting order
% \[
        % abc\to acb\to cab\to cba
% \]
% gives total correction
% \[
        % pa\langle b,c\rangle q+p\langle a,c\rangle bq
        % +pc\langle a,b\rangle q.
% \]
% These two expressions are equal by (L3), since (L1) and bilinearity imply
% $\langle c,a\rangle=-\langle a,c\rangle$.  Thus the total correction is
% well-defined; from now on we denote it simply by $C(w)$.

% Now apply this to the word
% \[
        % w=xx_1x_2\cdots x_ny.
% \]
% There are two natural ways to sort this word.  First sort the subword
% $xx_1x_2\cdots x_n$, and only afterwards insert $y$ into the resulting sorted
% word.  Moving $x$ through the already sorted word $x_1x_2\cdots x_n$ produces
% exactly the total correction $x\circ\sigma$, and these correction terms still
% have to be followed by $y$.  Then inserting $y$ into the sorted word representing
% $\xi\sigma$ produces the total correction $(\xi\sigma)\circ y$.  Thus this
% sorting process gives total correction
% \[
        % (x\circ\sigma)y+(\xi\sigma)\circ y.
% \]

% Alternatively, first sort the subword $x_1x_2\cdots x_ny$, and only afterwards
% insert $x$ into the resulting sorted word.  The first step produces the total
% correction $\sigma\circ y$, now preceded by $x$, and the second produces the
% total correction $x\circ(\sigma\eta)$.  Thus this sorting process gives total
% correction
% \[
        % x(\sigma\circ y)+x\circ(\sigma\eta).
% \]

% The independence of the total correction therefore gives
% \[
        % (x\circ\sigma)y+(\xi\sigma)\circ y
        % =x(\sigma\circ y)+x\circ(\sigma\eta).
% \]
% Rearranging this equality gives precisely
% \[
        % (\xi\sigma) \circ y - x(\sigma \circ y)
        % =x \circ (\sigma\eta) - (x \circ \sigma)y,
% \]
% which is equation \eqref{3}.

\section*{Appendix 1 (by DG). A cleaner proof of equation \eqref{3}}

\begin{fineprint}

This appendix gives a less index-heavy proof of equation \eqref{3} from
Section~4. This proof was suggested by GPT-5.5, and rewritten by myself in a
more elementary language. It was originally inspired by the theory of rewrite
systems \cite{baadernipkow, newman}.

We keep the notation from the proof of Theorem 3. Thus $X$ is a totally
ordered basis of the free $R$-module $M$, and we have defined elements
$x\circ\sigma$ and $\sigma\circ y$ of $A$ for all $x,y\in X$ and all basis
monomials $\sigma=\xi_{1}\xi_{2}\cdots\xi_{n}$ of $S\left(  M\right)  $.

A \emph{word} shall mean a finite list of elements of $X$. We shall write a
word $\left(  w_{1},w_{2},\ldots,w_{n}\right)  $ as $w_{1}w_{2}\cdots w_{n}$,
thus visually identifying it with the basis element of $T\left(  M\right)  $
it corresponds to.

The \emph{inversions} of a word $w_{1}w_{2}\cdots w_{n}$ are the pairs
$\left(  i,j\right)  $ of integers satisfying $1\leq i<j\leq n$ and
$w_{i}>w_{j}$. In other words, they are the pairs of positions in the word
such that the earlier position contains a (strictly) larger letter than the
later one. We let $\operatorname*{inv}w$ denote the number of inversions of a
word $w=w_{1}w_{2}\cdots w_{n}$. This is a nonnegative integer.

If $w=w_{1}w_{2}\cdots w_{n}$ is a word and $1\leq i<n$ an integer satisfying
$w_{i}>w_{i+1}$, then we can swap the $i$-th and $\left(  i+1\right)  $-st
letters of $w$ to obtain a new word%
\[
w^{\curvearrowright i}:=\left(  w_{1}w_{2}\cdots w_{i-1}\right)  w_{i+1}w_{i}\left(
w_{i+2}w_{i+3}\cdots w_{n}\right)  ,
\]
which has one fewer inversion than $w$; that is,
\begin{equation}
\operatorname*{inv}\left(  w^{\curvearrowright i}\right)  =\operatorname*{inv}%
w-1.\tag{8}\label{8}%
\end{equation}
We shall say that $w^{\curvearrowright i}$ is obtained from $w$ by a \emph{single
sorting step} in this case. Often, several different single sorting steps can
be applied to a given word $w$; for example, from the word $w=4231$ (over the
totally ordered set $X=\mathbb{Z}$), we can obtain either $w^{\curvearrowright1}=2431$
or $w^{\curvearrowright3}=4213$ by a single sorting step.

Now, for any word $w=w_{1}w_{2}\cdots w_{n}$, we want to define recursively an
element $J\left(  w\right)  \in A$ as follows:

\begin{enumerate}
\item If $\operatorname*{inv}w=0$ (that is, if $w$ has no inversions), then we
set
$J\left(  w\right)  =0$ in $A$.

\item If $\operatorname*{inv}w>0$, then there exists some integer $1\leq i<n$
satisfying $w_{i}>w_{i+1}$ (since otherwise, we would have $w_{1}\leq
w_{2}\leq\cdots\leq w_{n}$ and thus $\operatorname*{inv}w=0$). We choose such
an $i$ and set
\begin{equation}
J\left(  w\right)  =w_{1}w_{2}\cdots w_{i-1}\left\langle w_{i},w_{i+1}%
\right\rangle w_{i+2}w_{i+3}\cdots w_{n}+J\left(  w^{\curvearrowright i}\right)
.\tag{9}\label{9}%
\end{equation}
where (as we recall) $w^{\curvearrowright i}=\left(  w_{1}w_{2}\cdots w_{i-1}\right)
w_{i+1}w_{i}\left(  w_{i+2}w_{i+3}\cdots w_{n}\right)  $ is the word obtained
from $w$ by a single sorting step swapping the $i$-th and $\left(  i+1\right)
$-st letters.
\end{enumerate}

It is not obvious that this is well-defined, because in the
$\operatorname*{inv}w>0$ case, there might be several different integers
$1\leq i<n$ satisfying $w_{i}>w_{i+1}$, and then the formula \eqref{9} might
perhaps give different results depending on which of these $i$ is chosen. We
shall soon see that all choices of $i$ lead to the same $J\left(  w\right)  $
-- so that $J\left( w\right) $ really is well-defined -- but this must be
proved. What is clear is that there is always at least one candidate for
$J\left(  w\right)  $, because the recursion is well-founded (thanks to \eqref{8}).

We now want to show that all choices of $i$ do lead to the same $J\left(
w\right)  $ in \eqref{9}. For now, let us modify the definition so that the
equation \eqref{9} is used to define $J\left(  w\right)  $ only when $i$ is
the \textbf{smallest} integer $1\leq i<n$ satisfying $w_{i}>w_{i+1}$. This
way, the uniqueness of $J\left(  w\right)  $ is obvious (since the smallest
$i$ is always unique). But we must now prove that \eqref{9} holds for
\textbf{each} integer $1\leq i<n$ satisfying $w_{i}>w_{i+1}$, not just for the smallest.

\begin{proof}
[Proof of \eqref{9}.]We shall prove this by strong induction on
$\operatorname*{inv}w$. The \textit{base case} ($\operatorname*{inv}w=0$) is
obvious, since \eqref{9} does not apply to this case at all. For the
\textit{induction step}, we pick some positive integer $k$, and we assume that
\eqref{9} is proved for all words $w$ with $\operatorname*{inv}w<k$ (and all
$i$ that satisfy $w_{i}>w_{i+1}$, not just for the smallest such $i$). Now we
shall prove the same for all words $w$ with $\operatorname*{inv}w=k$.

Fix such a word $w=w_{1}w_{2}\cdots w_{n}$. Let $j$ be the \textbf{smallest}
integer $1\leq i<n$ satisfying $w_{i}>w_{i+1}$. Then, by our (modified)
definition of $J\left(  w\right)  $, we have%
\begin{equation}
J\left(  w\right)  =w_{1}w_{2}\cdots w_{j-1}\left\langle w_{j},w_{j+1}%
\right\rangle w_{j+2}w_{j+3}\cdots w_{n}+J\left(  w^{\curvearrowright j}\right)
.\tag{10}\label{10}%
\end{equation}


Now, pick \textbf{any} integer $1\leq i<n$ satisfying $w_{i}>w_{i+1}$. We must
prove \eqref{9}. If $i=j$, then this follows from \eqref{10}; thus, we assume
that $i\neq j$ from now on. Since $j$ was the smallest $i$ satisfying
$w_{i}>w_{i+1}$, we have $i\geq j$, and thus $i>j$ (since $i\neq j$). Hence,
we are in one of the following two cases:

\textit{Case 1:} We have $i=j+1$.

\textit{Case 2:} We have $i>j+1$.

Let us first consider Case 1. In this case, $i=j+1$. Thus, $w_{i}>w_{i+1}$
(which holds by assumption) rewrites as $w_{j+1}>w_{j+2}$. Moreover, the
definition of $j$ shows that $w_{j}>w_{j+1}$. Therefore, $w_{j}>w_{j+1}%
>w_{j+2}$. We shall use the notations $a:=w_{j}$, $b:=w_{j+1}$, $c:=w_{j+2}$,
$\ell:=w_{1}w_{2}\cdots w_{j-1}$ and $r:=w_{j+3}w_{j+4}\cdots w_{n}$; thus,
$w=\ell abcr$ (since $w=w_{1}w_{2}\cdots w_{n}$) and $a>b>c$ (since
$w_{j}>w_{j+1}>w_{j+2}$).

We have $w_{1}w_{2}\cdots w_{j-1}=\ell$ and $w_{j}=a$ and $w_{j+1}=b$ and
$w_{j+2}w_{j+3}\cdots w_{n}=cr$ and therefore $w^{\curvearrowright j}=\ell bacr$. Thus,
the equality \eqref{10} rewrites as%
\begin{equation}
J\left(  w\right)  =\ell\left\langle a,b\right\rangle cr+J\left(  \ell
bacr\right)  .\tag{11}\label{11}%
\end{equation}
Moreover, the word $\ell bacr$ is obtained from $\ell abcr=w$ by a single
sorting step (since $a>b$); thus, \eqref{8} yields $\operatorname*{inv}\left(
\ell bacr\right)  =\operatorname*{inv}w-1<\operatorname*{inv}w=k$. Hence, by
the induction hypothesis, \eqref{9} holds for $\ell bacr$ instead of $w$. In
particular, we can apply \eqref{9} to $\ell bacr$ and $j+1$ instead of $w$ and
$i$ (since the $\left(  j+1\right)  $-st and $\left(  j+2\right)  $-nd letters
of $\ell bacr$ are $a$ and $c$, and these satisfy $a>c$), and thus we obtain%
\begin{equation}
J\left(  \ell bacr\right)  =\ell b\left\langle a,c\right\rangle r+J\left(
\ell bcar\right)  .\tag{12}\label{12}%
\end{equation}
Furthermore, the word $\ell bcar$ is obtained from $\ell bacr$ by a single
sorting step (since $a>c$); thus, \eqref{8} yields $\operatorname*{inv}\left(
\ell bcar\right)  =\operatorname*{inv}\left(  \ell bacr\right)
-1<\operatorname*{inv}\left(  \ell bacr\right)  <k$. Hence, by the induction
hypothesis, \eqref{9} holds for $\ell bcar$ instead of $w$. In particular, we
can apply \eqref{9} to $\ell bcar$ and $j$ instead of $w$ and $i$ (since the
$j$-th and $\left(  j+1\right)  $-st letters of $\ell bcar$ are $b$ and $c$,
and these satisfy $b>c$), and thus we obtain%
\begin{equation}
J\left(  \ell bcar\right)  =\ell\left\langle b,c\right\rangle ar+J\left(  \ell
cbar\right)  .\tag{13}\label{13}%
\end{equation}


Now, \eqref{11} becomes%
\begin{align}
J\left(  w\right)   &  =\ell\left\langle a,b\right\rangle cr+J\left(  \ell
bacr\right)  =\ell\left\langle a,b\right\rangle cr+\ell b\left\langle
a,c\right\rangle r+J\left(  \ell bcar\right)  \qquad\left(  \text{by
\eqref{12}}\right) \nonumber\\
&  =\ell\left\langle a,b\right\rangle cr+\ell b\left\langle a,c\right\rangle
r+\ell\left\langle b,c\right\rangle ar+J\left(  \ell cbar\right)
\qquad\left(  \text{by \eqref{13}}\right) \nonumber\\
&  =\ell\left(  \left\langle a,b\right\rangle c+b\left\langle a,c\right\rangle
+\left\langle b,c\right\rangle a\right)  r+J\left(  \ell cbar\right)  .
\tag{14}\label{14}%
\end{align}


On the other hand, our goal is to prove \eqref{9}. In other words, our goal is
to prove%
\begin{equation}
J\left(  w\right)  =\ell a\left\langle b,c\right\rangle r+J\left(  \ell
acbr\right)  \tag{15}\label{15}%
\end{equation}
(since $i=j+1$, and thus $w_{1}w_{2}\cdots w_{i-1}=w_{1}w_{2}\cdots w_{j}=\ell
a$ and $w_{i}=w_{j+1}=b$ and $w_{i+1}=w_{j+2}=c$ and $w_{i+2}w_{i+3}\cdots
w_{n}=w_{j+3}w_{j+4}\cdots w_{n}=r$ and $w^{\curvearrowright i}=\ell acbr$). Let us
rewrite the right hand side.

The word $\ell acbr$ is obtained from $\ell abcr=w$ by a single sorting step
(since $b>c$); thus, \eqref{8} yields $\operatorname*{inv}\left(  \ell
acbr\right)  =\operatorname*{inv}w-1<\operatorname*{inv}w=k$. Hence, by the
induction hypothesis, \eqref{9} holds for $\ell acbr$ instead of $w$. In
particular, we can apply \eqref{9} to $\ell acbr$ and $j$ instead of $w$ and
$i$ (since the $j$-th and $\left(  j+1\right)  $-st letters of $\ell acbr$ are
$a$ and $c$, and these satisfy $a>c$), and thus we obtain%
\begin{equation}
J\left(  \ell acbr\right)  =\ell\left\langle a,c\right\rangle br+J\left(  \ell
cabr\right)  . \tag{16}\label{16}%
\end{equation}


The word $\ell cabr$ is obtained from $\ell acbr$ by a single sorting step
(since $a>c$); thus, \eqref{8} yields $\operatorname*{inv}\left(  \ell
cabr\right)  =\operatorname*{inv}\left(  \ell acbr\right)
-1<\operatorname*{inv}\left(  \ell acbr\right)  <k$. Hence, by the induction
hypothesis, \eqref{9} holds for $\ell cabr$ instead of $w$. In particular, we
can apply \eqref{9} to $\ell cabr$ and $j+1$ instead of $w$ and $i$ (since the
$\left(  j+1\right)  $-st and $\left(  j+2\right)  $-nd letters of $\ell cabr$
are $a$ and $b$, and these satisfy $a>b$), and thus we obtain%
\begin{equation}
J\left(  \ell cabr\right)  =\ell c\left\langle a,b\right\rangle r+J\left(
\ell cbar\right)  .\nonumber
\end{equation}
Substituting this into \eqref{16}, we find%
\begin{equation}
J\left(  \ell acbr\right)  =\ell\left\langle a,c\right\rangle br+\ell
c\left\langle a,b\right\rangle r+J\left(  \ell cbar\right)  . \tag{17}%
\label{17}%
\end{equation}


However, the axiom (L3) of the Lie structure $A$ yields
\[
\left(  \left\langle a,b\right\rangle c-c\left\langle a,b\right\rangle
\right)  +\left(  \left\langle b,c\right\rangle a-a\left\langle
b,c\right\rangle \right)  +\left(  \left\langle c,a\right\rangle
b-b\left\langle c,a\right\rangle \right)  =0.
\]
In view of $\left\langle c,a\right\rangle =-\left\langle a,c\right\rangle $
(this is an easy consequence of axiom (L1), just as for Lie algebras), we can
rewrite this as
\[
\left(  \left\langle a,b\right\rangle c-c\left\langle a,b\right\rangle
\right)  +\left(  \left\langle b,c\right\rangle a-a\left\langle
b,c\right\rangle \right)  +\left(  \left(  -\left\langle a,c\right\rangle
\right)  b-b\left(  -\left\langle a,c\right\rangle \right)  \right)  =0.
\]
Expanding the parentheses and collecting the negative terms on the right hand
side, we transform this into%
\begin{equation}
\left\langle a,b\right\rangle c+\left\langle b,c\right\rangle a+b\left\langle
a,c\right\rangle =c\left\langle a,b\right\rangle +a\left\langle
b,c\right\rangle +\left\langle a,c\right\rangle b. \tag{18}\label{18}%
\end{equation}
Now, \eqref{14} becomes%
\begin{align*}
J\left(  w\right)   &  =\ell\underbrace{\left(  \left\langle a,b\right\rangle
c+b\left\langle a,c\right\rangle +\left\langle b,c\right\rangle a\right)
}_{\substack{=\left\langle a,b\right\rangle c+\left\langle b,c\right\rangle
a+b\left\langle a,c\right\rangle \\=c\left\langle a,b\right\rangle
+a\left\langle b,c\right\rangle +\left\langle a,c\right\rangle b\\\text{(by
\eqref{18})}}}r+J\left(  \ell cbar\right) \\
&  =\ell\left(  c\left\langle a,b\right\rangle +a\left\langle b,c\right\rangle
+\left\langle a,c\right\rangle b\right)  r+J\left(  \ell cbar\right) \\
&  =\ell c\left\langle a,b\right\rangle r+\ell a\left\langle b,c\right\rangle
r+\ell\left\langle a,c\right\rangle br+J\left(  \ell cbar\right) \\
&  =\ell a\left\langle b,c\right\rangle r+\underbrace{\ell\left\langle
a,c\right\rangle br+\ell c\left\langle a,b\right\rangle r+J\left(  \ell
cbar\right)  }_{\substack{=J\left(  \ell acbr\right)  \\\text{(by
\eqref{17})}}}\\
&  =\ell a\left\langle b,c\right\rangle r+J\left(  \ell acbr\right)  .
\end{align*}
This proves \eqref{15}. In other words, \eqref{9} is proved for our arbitrary
$i$ (since \eqref{15} is just a rewritten form of \eqref{9}). This completes
the induction step in Case 1.

Let us now consider Case 2. In this case, $i>j+1$. Thus, the four integers
$j,j+1,i,i+1$ are distinct and ordered as follows: $j<j+1<i<i+1$. We shall use
the notations $a:=w_{j}$, $b:=w_{j+1}$, $c:=w_{i}$, $d:=w_{i+1}$, $\ell
:=w_{1}w_{2}\cdots w_{j-1}$, $m:=w_{j+2}w_{j+3}\cdots w_{i-1}$ and
$r:=w_{i+2}w_{i+3}\cdots w_{n}$; thus, $w=\ell abmcdr$ (since $w=w_{1}%
w_{2}\cdots w_{n}$ and $j<j+1<i<i+1$) and $a>b$ (since $w_{j}>w_{j+1}$) and
$c>d$ (since $w_{i}>w_{i+1}$).

We have $w_{1}w_{2}\cdots w_{j-1}=\ell$ and $w_{j}=a$ and $w_{j+1}=b$ and
$w_{j+2}w_{j+3}\cdots w_{n}=mcdr$ and thus $w^{\curvearrowright j}=\ell bamcdr$. Thus,
the equality \eqref{10} rewrites as%
\begin{equation}
J\left(  w\right)  =\ell\left\langle a,b\right\rangle mcdr+J\left(  \ell
bamcdr\right)  .\tag{19}\label{19}%
\end{equation}
Moreover, the word $\ell bamcdr$ is obtained from $\ell abmcdr=w$ by a single
sorting step (since $a>b$); thus, \eqref{8} yields $\operatorname*{inv}\left(
\ell bamcdr\right)  =\operatorname*{inv}w-1<\operatorname*{inv}w=k$. Hence, by
the induction hypothesis, \eqref{9} holds for $\ell bamcdr$ instead of $w$. In
particular, we can apply \eqref{9} to $\ell bamcdr$ and $i$ instead of $w$ and
$i$ (since the $i$-th and $\left(  i+1\right)  $-st letters of $\ell bamcdr$
are $c$ and $d$, and these satisfy $c>d$), and thus we obtain%
\[
J\left(  \ell bamcdr\right)  =\ell bam\left\langle c,d\right\rangle r+J\left(
\ell bamdcr\right)  .
\]
Substituting this into \eqref{19}, we find%
\begin{align}
J\left(  w\right)   &  =\ell\left\langle a,b\right\rangle mcdr+\ell
bam\left\langle c,d\right\rangle r+J\left(  \ell bamdcr\right)  \nonumber\\
&  =\ell\left(  \left\langle a,b\right\rangle mcd+bam\left\langle
c,d\right\rangle \right)  r+J\left(  \ell bamdcr\right)  .\tag{20}\label{20}%
\end{align}


On the other hand, our goal is to prove \eqref{9}. In other words, our goal is
to prove%
\begin{equation}
J\left(  w\right)  =\ell abm\left\langle c,d\right\rangle r+J\left(  \ell
abmdcr\right)  \tag{21}\label{21}%
\end{equation}
(since $w_{1}w_{2}\cdots w_{i-1}=\ell abm$ and $w_{i}=c$ and $w_{i+1}=d$ and
$w_{i+2}w_{i+3}\cdots w_{n}=r$ and thus $w^{\curvearrowright i}=\ell abmdcr$). Let us
rewrite the right hand side.

The word $\ell abmdcr$ is obtained from $\ell abmcdr=w$ by a single sorting
step (since $c>d$); thus, \eqref{8} yields $\operatorname*{inv}\left(  \ell
abmdcr\right)  =\operatorname*{inv}w-1<\operatorname*{inv}w=k$. Hence, by the
induction hypothesis, \eqref{9} holds for $\ell abmdcr$ instead of $w$. In
particular, we can apply \eqref{9} to $\ell abmdcr$ and $j$ instead of $w$ and
$i$ (since the $j$-th and $\left(  j+1\right)  $-st letters of $\ell abmdcr$ are
$a$ and $b$, and these satisfy $a>b$), and thus we obtain%
\begin{equation}
J\left(  \ell abmdcr\right)  =\ell\left\langle a,b\right\rangle mdcr+J\left(
\ell bamdcr\right)  . \tag{22}\label{22}%
\end{equation}


However, the axiom (L2) of the Lie structure $A$ yields
\[
\left\langle a,b\right\rangle m\left(  cd-dc\right)  =\left(  ab-ba\right)
m\left\langle c,d\right\rangle .
\]
Expanding both sides, we rewrite this as%
\[
\left\langle a,b\right\rangle mcd-\left\langle a,b\right\rangle
mdc=abm\left\langle c,d\right\rangle -bam\left\langle c,d\right\rangle .
\]
Equivalently,%
\[
\left\langle a,b\right\rangle mcd+bam\left\langle c,d\right\rangle
=abm\left\langle c,d\right\rangle +\left\langle a,b\right\rangle mdc.
\]
Substituting this into \eqref{20}, we obtain%
\begin{align*}
J\left(  w\right)   &  =\ell\left(  abm\left\langle c,d\right\rangle
+\left\langle a,b\right\rangle mdc\right)  r+J\left(  \ell bamdcr\right) \\
&  =\ell abm\left\langle c,d\right\rangle r+\underbrace{\ell\left\langle
a,b\right\rangle mdcr+J\left(  \ell bamdcr\right)  }_{\substack{=J\left(  \ell
abmdcr\right)  \\\text{(by \eqref{22})}}}\\
&  =\ell abm\left\langle c,d\right\rangle r+J\left(  \ell abmdcr\right)  .
\end{align*}
This proves \eqref{21}. In other words, \eqref{9} is proved for our arbitrary
$i$ (since \eqref{21} is just a rewritten form of \eqref{9}). This completes
the induction step in Case 2.

We have now completed the induction step in both Cases 1 and 2. Thus, the
induction proof of \eqref{9} is complete.
\end{proof}

Thus, for each word $w=w_{1}w_{2}\cdots w_{n}$, we have defined an element
$J\left(  w\right)  \in A$ that satisfies \eqref{9} for each $1\leq i<n$
satisfying $w_{i}>w_{i+1}$ (not just for the smallest such $i$).

Next, let us define some general terminology. A word is said to be
\emph{sorted} if its letters are weakly increasing from left to right (i.e.,
if it has no inversions). For each word $w$, there is exactly one sorted word
that contains the same letters (with the same multiplicities) as $w$; this
word is called the \emph{sorted rearrangement} of $w$, and can be obtained
from $w$ by successively applying single sorting steps until no more
inversions remain. We shall use the notation $\operatorname*{sort}w$ for this
word. Note that
\begin{align}
J\left(  w\right)  =0
\qquad \text{ for any sorted word } w
\label{23}\tag{23}
\end{align}
(by the base case of the recursive definition of $J\left(  w\right)  $,
since $\operatorname{inv} w = 0$).

%Furthermore, for any word $w$, the projection of $w$ onto $S\left(
%M\right)  $ is identical with the projection of $\operatorname*{sort}w$ onto
%$S\left(  M\right)  $ (since $\operatorname*{sort}w$ is obtained from $w$ by
%rearranging the letters, but this rearrangement does not affect the projection
%because $S\left(  M\right)  $ is commutative).


Furthermore, for any word $w$ (or, more generally, any element $w$ of
$T\left(  M\right)  $), we shall let $\overline{w}$ denote the projection of
$w$ onto $S\left(  M\right)  $.

We shall now prove the following general formula: If $v$ is a word and $w$ is
a sorted word, and $x\in X$ is a letter, then%
\begin{equation}
\left(  x\circ\overline{w}\right)  v=J\left(  xwv\right)  -J\left(
\operatorname*{sort}\left(  xw\right)  v\right)  . \tag{24}\label{24}%
\end{equation}


\begin{proof}
[Proof of \eqref{24}.]Indeed, let $v$ be a word, let $w$ be a sorted word, and
let $x\in X$ be a letter. Note that $\overline{\operatorname*{sort}\left(
xw\right)  }=\overline{xw}$ (since the words $\operatorname*{sort}\left(
xw\right)  $ and $xw$ differ only in the order of their letters, and thus have
the same projection onto the commutative algebra $S\left(  M\right)  $).

Write the sorted word $w$ as $w=t_{1}t_{2}\cdots t_{n}$. Thus, $\overline
{w}=\overline{t_{1}t_{2}\cdots t_{n}}
= \tau_{1}\tau_{2}\cdots\tau_{n}$, where each $\tau_{i}$ is the image of
$t_{i}$ in $S\left(  M\right)  $.

Since the word $t_{1}t_{2}\cdots t_{n}$ is sorted, we have $t_{1}\leq
t_{2}\leq\cdots\leq t_{n}$. Thus, we have%
\begin{equation}
t_{1}\leq t_{2}\leq\cdots\leq t_{m-1}<x\leq t_{m}\leq t_{m+1}\leq\cdots\leq
t_{n} \tag{25}\label{25}%
\end{equation}
for a unique integer $1\leq m\leq n+1$ (in particular, $m=n+1$ if $t_{n}<x$,
whereas $m=1$ if $x\leq t_{1}$). Consider this $m$. Thus, $t_{1}t_{2}\cdots
t_{m-1}xt_{m}t_{m+1}\cdots t_{n}$ is a sorted word (by \eqref{25}). The
definition of $x\circ\overline{w}$ yields%
\begin{align}
x\circ\overline{w}  &  =\sum_{t_{i}<x}t_{1}t_{2}\cdots t_{i-1}\left\langle
x,t_{i}\right\rangle t_{i+1}t_{i+2}\cdots t_{n}\ \ \ \ \ \ \ \ \ \ \left(
\text{since }\overline{w}=\tau_{1}\tau_{2}\cdots\tau_{n}\right) \nonumber\\
&  =\sum_{i=1}^{m-1}t_{1}t_{2}\cdots t_{i-1}\left\langle x,t_{i}\right\rangle
t_{i+1}t_{i+2}\cdots t_{n} \tag{26}\label{26}%
\end{align}
(since the integers $i\in\left\{  1,2,\ldots,n\right\}  $ satisfying $t_{i}<x$
are precisely $1,2,\ldots,m-1$, according to \eqref{25}).

Moreover, from $w=t_{1}t_{2}\cdots t_{n}$, we obtain%
\begin{equation}
\operatorname*{sort}\left(  xw\right)  =\operatorname*{sort}\left(
xt_{1}t_{2}\cdots t_{n}\right)  =t_{1}t_{2}\cdots t_{m-1}xt_{m}t_{m+1}\cdots
t_{n} \tag{27}\label{27}%
\end{equation}
(by \eqref{25}). Furthermore, the word $xwv$ can be transformed into the word
$\operatorname*{sort}\left(  xw\right)  v$ by the following sequence of single
sorting steps:%
\begin{align*}
xwv  &  =xt_{1}t_{2}\cdots t_{n}v\ \ \ \ \ \ \ \ \ \ \left(  \text{since
}w=t_{1}t_{2}\cdots t_{n}\right) \\
&  \mapsto t_{1}xt_{2}t_{3}\cdots t_{n}v\ \ \ \ \ \ \ \ \ \left(  \text{we
swapped }x\text{ with }t_{1}\right) \\
&  \mapsto t_{1}t_{2}xt_{3}t_{4}\cdots t_{n}v\ \ \ \ \ \ \ \ \ \left(
\text{we swapped }x\text{ with }t_{2}\right) \\
&  \mapsto\cdots\\
&  \mapsto\underbrace{t_{1}t_{2}\cdots t_{m-1}xt_{m}t_{m+1}\cdots t_{n}%
}_{=\operatorname*{sort}\left(  xw\right)  }v\ \ \ \ \ \ \ \ \ \left(
\text{we swapped }x\text{ with }t_{m-1}\right) \\
&  =\operatorname*{sort}\left(  xw\right)  v.
\end{align*}
Using the recursive definition \eqref{9} of the $J$-function, we thus have%
\begin{align*}
J\left(  xt_{1}t_{2}\cdots t_{n}v\right)   &  =\left\langle x,t_{1}%
\right\rangle t_{2}t_{3}\cdots t_{n}v+J\left(  t_{1}xt_{2}t_{3}\cdots
t_{n}v\right)  ;\\
J\left(  t_{1}xt_{2}t_{3}\cdots t_{n}v\right)   &  =t_{1}\left\langle
x,t_{2}\right\rangle t_{3}t_{4}\cdots t_{n}v+J\left(  t_{1}t_{2}xt_{3}%
t_{4}\cdots t_{n}v\right)  ;\\
&  \ldots;\\
J\left(  t_{1}t_{2}\cdots t_{m-2}xt_{m-1}t_{m}\cdots t_{n}v\right)   &
=t_{1}t_{2}\cdots t_{m-2}\left\langle x,t_{m-1}\right\rangle t_{m}%
t_{m+1}\cdots t_{n}v \\
& \qquad \qquad +J\left(  t_{1}t_{2}\cdots t_{m-1}xt_{m}t_{m+1}\cdots
t_{n}v\right)  .
\end{align*}
Substituting these equalities into one another, we obtain%
\begin{align}
&  J\left(  xt_{1}t_{2}\cdots t_{n}v\right) \nonumber\\
&  =\underbrace{\left\langle x,t_{1}\right\rangle t_{2}t_{3}\cdots
t_{n}v+t_{1}\left\langle x,t_{2}\right\rangle t_{3}t_{4}\cdots t_{n}%
v+\cdots+t_{1}t_{2}\cdots t_{m-2}\left\langle x,t_{m-1}\right\rangle
t_{m}t_{m+1}\cdots t_{n}v}_{=\sum\limits_{i=1}^{m-1}t_{1}t_{2}\cdots t_{i-1}%
\left\langle x,t_{i}\right\rangle t_{i+1}t_{i+2}\cdots t_{n}v}\nonumber\\
&  \ \ \ \ \ \ \ \ \ \ +J\left(  \underbrace{t_{1}t_{2}\cdots t_{m-1}%
xt_{m}t_{m+1}\cdots t_{n}}_{=\operatorname*{sort}\left(  xw\right)  }v\right)
\nonumber\\
&  =\underbrace{\sum_{i=1}^{m-1}t_{1}t_{2}\cdots t_{i-1}\left\langle
x,t_{i}\right\rangle t_{i+1}t_{i+2}\cdots t_{n}}_{\substack{=x\circ
\overline{w}\\\text{(by \eqref{26})}}}v+J\left(  \operatorname*{sort}\left(
xw\right)  v\right) \nonumber\\
&  =\left(  x\circ\overline{w}\right)  v+J\left(  \operatorname*{sort}\left(
xw\right)  v\right)  . \tag{28}\label{28}%
\end{align}
Since $t_{1}t_{2}\cdots t_{n}=w$, this rewrites as $J\left(  xwv\right)
=\left(  x\circ\overline{w}\right)  v+J\left(  \operatorname*{sort}\left(
xw\right)  v\right)  $. This proves \eqref{24}.
\end{proof}

Similarly to \eqref{24}, we can prove that if $u$ is a word and $w$ is a
sorted word, and $y\in X$ is a letter, then%
\begin{equation}
u\left(  \overline{w}\circ y\right)  =J\left(  uwy\right)  -J\left(
u\operatorname*{sort}\left(  wy\right)  \right)  . \tag{29}\label{29}%
\end{equation}


Now, fix $x,y\in X$ and $x_{1}\leq x_{2}\leq\cdots\leq x_{n}$ in $X$. Let
$\xi$, $\eta$ and $\xi_{i}$ be the images of $x$, $y$ and $x_{i}$ in
$S\left(  M\right)  $. Let $\sigma=\xi_{1}\xi_{2}\cdots\xi_{n}$. We now take
aim at proving the two equalities%
\begin{align}
\left(  \xi\sigma\right)  \circ y+\left(  x\circ\sigma\right)  y  &  =J\left(
xx_{1}x_{2}\cdots x_{n}y\right)  \ \ \ \ \ \ \ \ \ \ \text{and} \tag{32}%
\label{32}\\
x\circ\left(  \sigma\eta\right)  +x\left(  \sigma\circ y\right)   &  =J\left(
xx_{1}x_{2}\cdots x_{n}y\right)  . \tag{33}\label{33}%
\end{align}
Once these two equalities are proved, it will immediately follow that%
\[
\left(  \xi\sigma\right)  \circ y+\left(  x\circ\sigma\right)  y=J\left(
xx_{1}x_{2}\cdots x_{n}y\right)  =x\circ\left(  \sigma\eta\right)  +x\left(
\sigma\circ y\right)  ,
\]
thus%
\[
\left(  \xi\sigma\right)  \circ y-x\left(  \sigma\circ y\right)
=x\circ\left(  \sigma\eta\right)  -\left(  x\circ\sigma\right)  y,
\]
and thus \eqref{3} will be proved. Thus, it remains to prove
\eqref{32} and \eqref{33}.

The word $x_{1}x_{2}\cdots x_{n}$ is sorted (since $x_{1}\leq x_{2}\leq
\cdots\leq x_{n}$). Hence, applying \eqref{24} to $v=y$ and $w=x_{1}%
x_{2}\cdots x_{n}$, we find%
\begin{equation}
\left(  x\circ\overline{x_{1}x_{2}\cdots x_{n}}\right)  y=J\left(  xx_{1}%
x_{2}\cdots x_{n}y\right)  -J\left(  \operatorname*{sort}\left(  xx_{1}%
x_{2}\cdots x_{n}\right)  y\right)  .\nonumber
\end{equation}
Thus,%
\begin{align}
J\left(  xx_{1}x_{2}\cdots x_{n}y\right)   &  =J\left(  \operatorname*{sort}%
\left(  xx_{1}x_{2}\cdots x_{n}\right)  y\right)  +\left(  x\circ
\overline{x_{1}x_{2}\cdots x_{n}}\right)  y\nonumber\\
&  =J\left(  \operatorname*{sort}\left(  xx_{1}x_{2}\cdots x_{n}\right)
y\right)  +\left(  x\circ\sigma\right)  y \tag{34}\label{34}%
\end{align}
(since $\overline{x_{1}x_{2}\cdots x_{n}}=\xi_{1}\xi_{2}\cdots\xi_{n}=\sigma
$). But $\operatorname*{sort}\left(  xx_{1}x_{2}\cdots x_{n}\right)  $ is also
a sorted word. Thus, applying \eqref{29} to $u=1$ and $w=\operatorname*{sort}%
\left(  xx_{1}x_{2}\cdots x_{n}\right)  $, we find%
\begin{equation}
1\left(  \overline{\operatorname*{sort}\left(  xx_{1}x_{2}\cdots x_{n}\right)
}\circ y\right)  =J\left(  1\operatorname*{sort}\left(  xx_{1}x_{2}\cdots
x_{n}\right)  y\right)  -J\left(  1\operatorname*{sort}\left(
\operatorname*{sort}\left(  xx_{1}x_{2}\cdots x_{n}\right)  y\right)  \right)
.\nonumber
\end{equation}
This simplifies to%
\begin{align*}
\overline{\operatorname*{sort}\left(  xx_{1}x_{2}\cdots x_{n}\right)  }\circ
y  &  =J\left(  \operatorname*{sort}\left(  xx_{1}x_{2}\cdots x_{n}\right)
y\right)  -\underbrace{J\left(  \operatorname*{sort}\left(
\operatorname*{sort}\left(  xx_{1}x_{2}\cdots x_{n}\right)  y\right)  \right)
}_{\substack{=0\\\text{(by \eqref{23}, since }\operatorname*{sort}\left(
\operatorname*{sort}\left(  xx_{1}x_{2}\cdots x_{n}\right)  y\right)
\\\text{is a sorted word)}}}\\
&  =J\left(  \operatorname*{sort}\left(  xx_{1}x_{2}\cdots x_{n}\right)
y\right)  .
\end{align*}
Thus,%
\begin{equation}
J\left(  \operatorname*{sort}\left(  xx_{1}x_{2}\cdots x_{n}\right)  y\right)
=\underbrace{\overline{\operatorname*{sort}\left(  xx_{1}x_{2}\cdots
x_{n}\right)  }}_{=\overline{xx_{1}x_{2}\cdots x_{n}}=\xi\xi_{1}\xi_{2}%
\cdots\xi_{n}=\xi\sigma}\circ\,y=\left(  \xi\sigma\right)  \circ y.\nonumber
\end{equation}
Substituting this into \eqref{34}, we obtain%
\[
J\left(  xx_{1}x_{2}\cdots x_{n}y\right)  =\left(  \xi\sigma\right)  \circ
y+\left(  x\circ\sigma\right)  y.
\]
This proves \eqref{32}. A similar argument (in which we first apply \eqref{29}
to $u=x$ and $w=x_{1}x_{2}\cdots x_{n}$, and then apply \eqref{24} to $v=1$
and $w=\operatorname*{sort}\left(  x_{1}x_{2}\cdots x_{n}y\right)  $) proves
\eqref{33}. Hence, as we said above, \eqref{3} follows.

\end{fineprint}

\section*{Appendix 2 (by DG). Birkhoff--Witt for flat modules}

\begin{fineprint}

Theorem 7 (i) can be generalized as follows:

\begin{theorem}
Let $R$ be a fixed commutative ring, and $M$ be a flat $R$-module.
Then, $B(M) = 0$.
\end{theorem}

\begin{proof}
The Govorov--Lazard theorem (\cite[Th\'eor\`eme 1.2 (iii)]{lazard2},
\cite{hines}) shows that $M$ is a direct limit of a directed system
$\{M_\alpha\}$ of free $R$-modules. By Theorem~7~(i), all these free
$R$-modules $M_\alpha$ satisfy $B(M_\alpha) = 0$. Hence, Theorem~8
yields $B(M) =  \varinjlim B(M_\alpha) = \varinjlim 0 = 0$.
\end{proof}

\end{fineprint}

\begin{thebibliography}{99}

\bibitem{birkhoff}
\href{https://doi.org/10.2307/1968569}{\textsc{Birkhoff, G.} Representability of Lie algebras and Lie groups by matrices. \textit{Annals Math.}~\textbf{38} (1937), 526--532.}

\bibitem{cartier}
\href{https://eudml.org/doc/83207}{\textsc{Cartier, P.} Remarques sur le th\'eor\`eme de Birkhoff--Witt. \textit{Ann.\ Scuola norm.\ sup.\ Pisa, Sci.\ fis.\ mat.}~3 Ser.~\textbf{12} (1958), 1--4.}

\bibitem{cohn}
\href{https://doi.org/10.1112/jlms/s1-38.1.197}{\textsc{Cohn, P. M.} A remark on the Birkhoff--Witt theorem. \textit{J.~London Math.\ Soc.}~\textbf{38} (1963), 197--203.}

\bibitem{frohlich}
\href{https://doi.org/10.1090/S0002-9947-1963-0158920-3}{\textsc{Fr\"ohlich, A.} Baer-invariants of algebras. \textit{Trans.\ Am.\ Math.\ Soc.}~\textbf{109} (1963), 221--244.}

\bibitem{lazard}
\textsc{Lazard, M.} Sur les alg\`ebres enveloppantes universelles de certaines alg\`ebres de Lie. \textit{Publ.\ Sci.\ Univ.\ Alger}, S\'er.~A.~\textbf{1} (1954), 281--294.

\bibitem{nagata}
\textsc{Nagata, M.} ``{Local rings}'', (Interscience, New York, 1962).

\bibitem{witt}
\href{https://eudml.org/doc/150011}{\textsc{Witt, E.} Treue Darstellung Liescher Ringe. \textit{J.~reine angew.\ Math.}~\textbf{177} (1937), 152--160.} \footnote{\textit{Comment by DG:} Some titles corrected.}

\noindent\textbf{Additional references inserted by DG:}

\bibitem{baadernipkow}
\href{https://doi.org/10.1017/CBO9781139172752}{\textsc{Baader, F. and Nipkow, T.} ``Term Rewriting and All That'', (Cambridge University Press, Cambridge, 1998).}

% \bibitem{bergman}
% \href{https://doi.org/10.1016/0001-8708(78)90010-5}{\textsc{Bergman, G. M.} The diamond lemma for ring theory. \textit{Advances in Mathematics}~\textbf{29} (1978), 178--218.}

% \bibitem{bjornerbrenti}
% \textsc{Bj\"orner, A. and Brenti, F.} \textit{Combinatorics of Coxeter Groups}. Graduate Texts in Mathematics~\textbf{231}. Springer, 2005.

\bibitem{hines}
\textsc{Hines, R.} Lazard's theorem (characterizing flatness), notes, December 7, 2016.
\url{https://math.colorado.edu/~rohi1040/expository/lazardstheorem.pdf}

\bibitem{lazard2}
\textsc{Lazard, D.} Autour de la platitude. \textit{Bulletin de la Soci\'et\'e Math\'ematique de France}~\textbf{97} (1969), 81--128.

\bibitem{newman}
\href{https://www.jstor.org/stable/1968867}{\textsc{Newman, M. H. A.} On theories with a combinatorial definition of ``equivalence''. \textit{Annals of Mathematics}, Second Series, \textbf{43} (1942), 223--243.}

% \bibitem{llpt}
% \href{https://www.math.ku.dk/bibliotek/arkivet/noter/sympol.pdf}{\textsc{Laksov, D. and Lascoux, A. and Pragacz, P. and Thorup, A.} The LLPT Notes, 2018.}

\end{thebibliography}

\bigskip

\noindent\small \copyright\ 1969 by Academic Press, Inc.

\end{document}
