Appendix A Proofs
Proof of Lemma 1
$$\begin{aligned} \textrm{E}(\widetilde{\varvec{D}}_{x}|\mathcal {F}_{n}) =&\textrm{E}\bigg \{\frac{1}{n}(\frac{1}{r}\sum _{i=1}^{r}\frac{1}{\pi _{i}^{*}}w^*_i\varvec{X}_{i}^{*}\varvec{X}_{i}^{*\top } - \varvec{S}_\lambda )|\mathcal {F}_{n}\bigg \} \\ =&\frac{1}{n}(\sum _{i=1}^{n}w_i\varvec{X}_{i}\varvec{X}_{i}^{\top } - \varvec{S}_\lambda )\\ =&\varvec{D}_{x}. \end{aligned}$$
For any integers \(j_1, j_2 \in [1,p]\), let \(\widetilde{\varvec{D}}^{j_1,j_2}_x = \frac{1}{n}\bigg \{\frac{1}{r}\sum _{i=1}^{r}\frac{1}{\pi _{i}^{*}(\widetilde{\varvec{\beta }}_{0})}w_{i}^{*}x_{ij_{1}}^{*}x_{ij_{2}}^{*}-\varvec{S}_\lambda ^{j_{1}j_{2}}\bigg \}\) be the component of \(\widetilde{\varvec{D}}_x\). We have
$$\begin{aligned}&\textrm{Var}(\widetilde{\varvec{D}}_{x}^{j_{1}j_{2}}|\mathcal {F}_{n}) \\&=\textrm{Var}\bigg \{\frac{1}{n}\bigg (\frac{1}{r}\sum _{i=1}^{r}w^*_i\frac{x_{ij_{1}}^{*}x_{ij_{2}}^{*}}{\pi _{i}^{*}} - \varvec{S}^{j_{1}j_{2}}_\lambda \bigg )|\mathcal {F}_{n}\bigg \}\\&=\textrm{Var}\bigg \{\frac{1}{nr}\sum _{i=1}^{r}w^*_i\frac{x_{ij_{1}}^{*}x_{ij_{2}}^{*}}{\pi _{i}^{*}}|\mathcal {F}_{n}\bigg \} \\&=\frac{1}{r}\sum _{i=1}^{n}\pi _{i}\bigg (\sum _{i=1}^{n}w_i\frac{x_{ij_{1}}x_{ij_{2}}}{n\pi _{i}}-\frac{1}{rn}\sum _{i=1}^{n}w_ix_{ij_{1}}x_{ij_{2}}\bigg )^{2} \\&=\frac{1}{rn^{2}}\sum _{i=1}^{n}\frac{(w_ix_{ij_{1}}x_{ij_{2}})^{2}}{\pi _{i}}-\frac{1}{r}\bigg (\frac{1}{rn}\sum _{i=1}^{n}w_ix_{ij_{1}}x_{ij_{2}}\bigg )^{2} \\&\le \frac{1}{rn^{2}}\sum _{i=1}^{n}\frac{w^2_i ||\varvec{X}_{i}||^{4}}{\pi _{i}}-\frac{1}{r}\bigg (\frac{1}{rn}\sum _{i=1}^{n}w_ix_{ij_{1}}x_{ij_{2}}\bigg )^{2}. \end{aligned}$$
Since \(w^2_i\) is bounded, we denote its upper bound as \(M_1\), i.e., \(w^2_i \le M_1\). From Assumptions 2 and 4, we have
$$\begin{aligned} \textrm{Var}(\widetilde{\varvec{D}}_{x}^{j_{1}j_{2}}|\mathcal {F}_{n})&\le \frac{M_{1}}{rn^{2}}\sum _{i=1}^{n}\frac{\Vert \varvec{X}_{i}\Vert ^{4}}{\pi _{i}}-\frac{1}{r}\bigg (\frac{1}{rn}\sum _{i=1}^{n}w_{i}x_{ij_{1}}x_{ij_{2}}\bigg )^{2}\\&=O_{P|\mathcal {F}_{n}}(r^{-1}). \end{aligned}$$
According to Markov’s Inequality, we have
$$\begin{aligned}&P\{(\widetilde{\varvec{D}}_{x}-\varvec{D}_{x})\ge a\}\le \frac{M_{1}}{rn^{2}}\sum _{i=1}^{n}\frac{\Vert \varvec{X}_{i}\Vert ^{4}}{\pi _{i}}\\&\quad -\frac{1}{r}\bigg (\frac{1}{rn}\sum _{i=1}^{n}w_{i}x_{ij_{1}}x_{ij_{2}}\bigg )^{2}\\&=O_{P|\mathcal {F}_{n}}(r^{-1}). \end{aligned}$$
Thus, \((\widetilde{\varvec{D}}_{x}-\varvec{D}_{x})^{2}=O_{P|\mathcal {F}_{n}}(r^{-1})\). Moreover,
$$\begin{aligned} \frac{1}{n}\frac{\partial l^{*}({\widehat{\varvec{\beta }}})}{\partial \varvec{\beta }}=\frac{1}{n}\bigg \{\frac{1}{r}\sum _{i=1}^{r}\frac{1}{\pi _{i}^{*}}w_{i}^{*}(y_{i}^{*}-\mu ^*_i)\varvec{X}_{i}^{*} - \varvec{S}_\lambda {\widehat{\varvec{\beta }}}\bigg \}, \end{aligned}$$
(A1)
Thus,
$$\begin{aligned} \textrm{E}\bigg \{\frac{1}{n}\frac{\partial l^{*}(\widehat{\varvec{\beta }})}{\partial \varvec{\beta }}|\mathcal {F}_{n}\bigg \}&=\frac{1}{n}\bigg \{\sum _{i=1}^{n}w_{i}(y_{i}-\mu _i)\varvec{X}_{i} - \varvec{S}_\lambda \widehat{\varvec{\beta }}\bigg \}\\&=\frac{1}{n}\frac{\partial l(\widehat{\varvec{\beta }})}{\partial \varvec{\beta }}=0, \\ \textrm{Var}\bigg \{\frac{1}{n}\frac{\partial l^{*}(\widehat{\varvec{\beta }})}{\partial \varvec{\beta }}|\mathcal {F}_{n}\bigg \}&=\textrm{Var}\bigg [\frac{1}{n}\bigg \{\frac{1}{r}\sum _{i=1}^{r}\frac{1}{\pi _{i}^{*}}w_{i}^{*}(y_{i}^{*}-\mu _i)\varvec{X}_{i}^{*}\\&\qquad - \varvec{S}_\lambda \widehat{\varvec{\beta }}\bigg \}|\mathcal {F}_{n}\bigg ]\\&=\textrm{Var}\bigg \{\frac{1}{nr}\sum _{i=1}^{r}\frac{1}{\pi _{i}^{*}}w_{i}^{*}\\&\qquad (y_{i}^{*}-\mu ^*_i)\varvec{X}_{i}^{*}|\mathcal {F}_{n}\bigg \}\\&=\frac{1}{rn^{2}}\sum _{i=1}^{n}\frac{w_{i}^{2}(y_{i}-\varvec{X}_{i}^{\top }\widehat{\varvec{\beta }})^{2}\varvec{X}_{i}\varvec{X}_{i}^{\top }}{\pi _{i}}\\&\le \frac{1}{rn^{2}}\sum _{i=1}^{n}\frac{w_{i}^{2}(y_{i}-\mu _i)^{2}\Vert \varvec{X}_{i}\Vert ^{2}}{\pi _{i}}. \end{aligned}$$
Let \(|y_{i}-\mu _i|^{2}\le M_{2}\). Given that \(w_{i}^{2}\le M_{1}\), from Assumption 2, we have
$$\begin{aligned}&\textrm{Var}\bigg \{\frac{1}{n}\frac{\partial l^{*}(\widehat{\varvec{\beta }})}{\partial \varvec{\beta }}|\mathcal {F}_{n}\bigg \}\\&\qquad \le \frac{M_{1}M_{2}}{rn^{2}}\sum _{i=1}^{n}\frac{\Vert \varvec{X}_{i}\Vert ^{2}}{\pi _{i}} = O_{P|\mathcal {F}_{n}}(r^{-1}). \end{aligned}$$
From Markov’s Inequality, we have
$$\begin{aligned}&P\bigg [\bigg \{\frac{1}{n}\frac{\partial l^{*}(\widehat{\varvec{\beta }})}{\partial \varvec{\beta }}-\frac{1}{n}\frac{\partial l(\widehat{\varvec{\beta }})}{\partial \varvec{\beta }}\bigg \}^{2}\ge a\bigg ]\\&\qquad \le \frac{\textrm{Var}\bigg \{\frac{1}{n}\frac{\partial l^{*}(\widehat{\varvec{\beta }})}{\partial \varvec{\beta }}\bigg \}}{a} = O_{P|\mathcal {F}_{n}}(r^{-1}). \end{aligned}$$
Thus,
$$\begin{aligned} \frac{1}{n}\frac{\partial l^{*}(\widehat{\varvec{\beta }})}{\partial \varvec{\beta }}=O_{P|\mathcal {F}_{n}}(r^{-1/2}). \end{aligned}$$
\(\square \)
Proof of Theorem 1
Denote the first order derivative of \(l^{*}(\widetilde{\varvec{\beta }})\) to be \(\dot{l}^{*}(\widetilde{\varvec{\beta }})\). For \(\forall \ 0< u, v < 1\), the Tayler expansion of \(\dot{l}^{*}(\widetilde{\varvec{\beta }})\) at \(\widehat{\varvec{\beta }}_{j}\) is
$$\begin{aligned} \dot{l}^{*}(\widetilde{\varvec{\beta }}_{j}) \approx \,&\dot{l}^{*}(\widehat{\varvec{\beta }}_{j})+\frac{\partial \dot{l}^{*}(\widehat{\varvec{\beta }}_{j})}{\partial \varvec{\beta }^{\top }_{j}} (\widetilde{\varvec{\beta }}_{j}-\widehat{\varvec{\beta }}_{j})+R_{j}=0,\nonumber \\ R_{j}=\,&(\widetilde{\varvec{\beta }}_{j}-\widehat{\varvec{\beta }}_{j})^{\top }\nonumber \\&\times \int _{0}^{1}\int _{0}^{1}\frac{\partial ^{2}\dot{l}^{*}\{\widehat{\varvec{\beta }}_{j}+uv(\widetilde{\varvec{\beta }}_{j}-\widehat{\varvec{\beta }}_{j})\}}{\partial \varvec{\beta }_{j}\partial \varvec{\beta }^{\top }_{j}}v\textrm{d}u\textrm{d}v\nonumber \\&\times (\widetilde{\varvec{\beta }}_{j}-\widehat{\varvec{\beta }}_{j}). \end{aligned}$$
(A2)
According to Chapter 4 of Ferguson (1996), \(\Vert \frac{\partial ^{2}\dot{l}^{*}(\varvec{\beta })}{\partial \varvec{\beta }\partial \varvec{\beta }^{\top }}\Vert =0\) is true for \(\forall \varvec{\beta }\). Thus,
$$\begin{aligned}&\bigg \Vert \int _{0}^{1}\int _{0}^{1}\frac{\partial ^{2}\dot{l}^{*}\{\widehat{\varvec{\beta }}_{j}+uv(\widetilde{\varvec{\beta }}_{j}-\widehat{\varvec{\beta }}_{j})\}}{\partial \varvec{\beta }_{j}\partial \varvec{\beta }^{\top }_{j}}v\textrm{d}u\textrm{d}v\bigg \Vert \\&\quad \le \int _{0}^{1}\int _{0}^{1}\bigg \Vert \frac{\partial ^{2}\dot{l}^{*}\{\widehat{\varvec{\beta }}_{j}+uv(\widetilde{\varvec{\beta }}_{j}-\widehat{\varvec{\beta }}_{j})\}}{\partial \varvec{\beta }_{j}\partial \varvec{\beta }^{\top }_{j}}\bigg \Vert v\textrm{d}u\textrm{d}v=0. \end{aligned}$$
By Lemma 1, we have
$$\begin{aligned} \widetilde{\varvec{\beta }}-\widehat{\varvec{\beta }}=-\widetilde{\varvec{D}}_{x}^{-1}\frac{\dot{l}^{*}(\widehat{\varvec{\beta }})}{n}=O_{P|\mathcal {F}_{n}}\left( r^{-1/2}\right) . \end{aligned}$$
(A3)
\(\square \)
Proof of Theorem 2
Note that
$$\begin{aligned} \frac{\dot{l}^{*}(\widehat{\varvec{\beta }})}{n}&=\frac{1}{r}\sum _{i=1}^{r}\frac{w_{i}^{*}(y_{i}^{*}-\mu _i)\varvec{X}_{i}^{*}}{n\pi _{i}^{*}} - \frac{1}{n}\varvec{S}_\lambda \widehat{\varvec{\beta }} \\&=\frac{1}{r}\sum _{i=1}^{r}\bigg \{\frac{w_{i}^{*}(y_{i}^{*}-\mu _i)\varvec{X}_{i}^{*}}{n\pi _{i}^{*}} - \frac{1}{n} \varvec{S}_\lambda \widehat{\varvec{\beta }}\bigg \} \\ &:=\frac{1}{r}\sum _{i=1}^{r}\varvec{\eta }_{i}. \end{aligned}$$
Furthermore, by Lemma 1,
$$\begin{aligned} & \textrm{E}\bigg \{\frac{1}{n}\dot{l}^{*}(\widehat{\varvec{\beta }})\bigg \}=0, \end{aligned}$$
(A4)
$$\begin{aligned} & \textrm{Var}\bigg \{\frac{1}{n}\dot{l}^{*}(\widehat{\varvec{\beta }})\bigg \}=\varvec{V}_{c}=O_{P|\mathcal {F}_{n}}(r^{-1}). \end{aligned}$$
(A5)
Conditional on \(\sigma \)-field \(\mathcal {F}_{n}\), with \({\varvec{\eta }_{i}}\) i.i.d., we can know from Equation (A4) that \(\textrm{E}(\varvec{\eta }_{i}|\mathcal {F}_{n})=0\), \(i=1,2,...,r\). Thus, one can further derive that \(\textrm{Var}(\varvec{\eta }_{i}|\mathcal {F}_{n})=r\varvec{V}_{c}=O_{P|\mathcal {F}_{n}}(1)\) for \(i=1,2,...,r\). Moreover, denote identity function as \(I(\cdot )\). For \(\forall \varepsilon >0\), there exists \(\tau >0\) such that
$$\begin{aligned}&\sum _{i=1}^{r}\textrm{E}\bigg \{\Vert r^{-1/2}\varvec{\eta }_{i}\Vert ^{2}I(\Vert r^{-1/2}\varvec{\eta }_{i}\Vert>\varepsilon )|\mathcal {F}_{n}\bigg \}\\&\quad =\sum _{i=1}^{r}\textrm{E}\bigg \{\Vert r^{-1/2}\varvec{\eta }_{i}\Vert ^{2}I(\Vert \varvec{\eta }_{i}\Vert> r^{1/2}\varepsilon )|\mathcal {F}_{n}\bigg \}\\&\quad \le \sum _{i=1}^{r}\textrm{E}\bigg \{\Vert r^{-1/2}\varvec{\eta }_{i}\Vert ^{2}\bigg (\frac{\Vert \varvec{\eta }_{i}\Vert }{r^{1/2}\varepsilon }\bigg )^{\tau }I(\Vert \varvec{\eta }_{i}\Vert > r^{1/2}\varepsilon )|\mathcal {F}_{n}\bigg \}\\&\quad \le \frac{1}{r^{1+\frac{\tau }{2}}\varepsilon ^{\tau }}\textrm{E}\bigg (\sum _{i=1}^{r}\Vert \varvec{\eta }_{i}\Vert ^{2+\tau }\bigg )\\&\quad =\frac{1}{r^{1+\frac{\tau }{2}}\varepsilon ^{\tau }}\textrm{E}\bigg \{\sum _{i=1}^{r}\bigg \Vert \frac{w_{i}^{*}(y_{i}^{*}-\mu _i)\varvec{X}_{i}^{*}}{n\pi _{i}^{*}} - \frac{1}{n}\varvec{S}_\lambda \widehat{\varvec{\beta }}\bigg \Vert ^{2+\tau }\bigg \}\\&\quad \le \frac{1}{r^{1+\frac{\tau }{2}}\varepsilon ^{\tau }}\sum _{i=1}^{r}\bigg \{\frac{\Vert w_{i}^{*} (y_{i}^{*}-\mu _i)\varvec{X}_{i}^{*}\Vert ^{2+\tau }}{n^{2+\tau }(\pi _{i}^{*})^{2+\tau }}+\frac{\Vert \varvec{S}_\lambda \widehat{\varvec{\beta }}\Vert ^{2+\tau }}{n^{2+\tau }}\bigg \}\\&\quad =\frac{1}{r^{\frac{\tau }{2}}\varepsilon ^{\tau }}\sum _{i=1}^{n}\frac{\Vert w_{i}(y_{i}-\mu _i)\varvec{X}_{i}\Vert ^{2+\tau }}{n^{2+\tau }(\pi _{i})^{1+\tau }}\\&\qquad +\frac{1}{r^{1+\frac{\tau }{2}}\varepsilon ^{\tau }}\sum _{i=1}^{r}\frac{\Vert \varvec{S}_\lambda \widehat{\varvec{\beta }}\Vert ^{2+\tau }}{n^{2+\tau }}\\&\quad \le \frac{1}{r^{\frac{\tau }{2}}\varepsilon ^{\tau }}\sum _{i=1}^{n}\frac{|w_{i}|^{2+\tau }\Vert \varvec{X}_{i}\Vert ^{2+\tau }\Vert y_{i}-\mu _i\Vert ^{2+\tau }}{n^{2+\tau }\pi _{i}^{1+\tau }}\\&\qquad +\frac{1}{r^{\frac{\tau }{2}}\varepsilon ^{\tau }}\frac{\Vert \varvec{S}_\lambda \Vert ^{2+\tau }\Vert \widehat{\varvec{\beta }}\Vert ^{2+\tau }}{n^{2+\tau }}. \end{aligned}$$
There exists \(M_3, M_4, M_5, M_6 > 0\), such that \(|w_{i}|^{2+\tau }\le M_{3}\), \(|y_{i}-\mu _i|^{2+\tau }\le M_{4}\), \(\Vert \varvec{S}_\lambda \Vert ^{2+\tau }\le M_{5}\), \(\Vert \widehat{\varvec{\beta }}\Vert ^{2+\tau }\le M_{6}\). Thus,
$$\begin{aligned}&\sum _{i=1}^{r}\textrm{E}\bigg \{\Vert r^{-1/2}\varvec{\eta }_{i}\Vert ^{2}I(\Vert r^{-1/2}\varvec{\eta }_{i}\Vert >\varepsilon )|\mathcal {F}_{n}\bigg \} \\&\quad \le \frac{M_{3}M_{4}}{r^{\frac{\tau }{2}}\varepsilon ^{\tau }}\sum _{i=1}^{n}\frac{\Vert \varvec{X}_{i}\Vert ^{2+\tau }}{n^{2+\tau }\pi _{i}^{1+\tau }} +\frac{M_{5}M_{6}}{r^{\frac{\tau }{2}}\varepsilon ^{\tau }n^{2+\tau }}. \end{aligned}$$
From Assumption 3, when \(n\rightarrow \infty \) and \(r\rightarrow \infty \),
$$\begin{aligned}&\sum _{i=1}^{r}\textrm{E}\bigg \{\Vert r^{-1/2}\varvec{\eta }_{i}\Vert ^{2}I(\Vert r^{-1/2}\varvec{\eta }_{i}\Vert >\varepsilon )|\mathcal {F}_{n}\bigg \} =o_{P|\mathcal {F}_{n}}(1). \end{aligned}$$
Thus, by the Lindeberg-Feller central limit theorem,
$$\begin{aligned}&\frac{1}{n}\varvec{V}_{c}^{-1/2}\dot{l}^{*}(\widehat{\varvec{\beta }})\nonumber \\&\quad =r^{-1/2}{\textrm{Var}(\varvec{\eta }_{i}|\mathcal {F}_{n})}^{-1/2}\sum _{i=1}^{r}\varvec{\eta }_{i}\xrightarrow []{d} N(\textbf{0},\varvec{I}). \end{aligned}$$
(A6)
From Lemma 1 and Theorem 1,
$$\begin{aligned} \widetilde{\varvec{D}}_{x}^{-1}-\varvec{D}_{x}^{-1}=-\varvec{D}_{x}^{-1}(\widetilde{\varvec{D}}_{x}-\varvec{D}_{x})\widetilde{\varvec{D}}_{x}^{-1}=O_{P|\mathcal {F}_{n}}(r^{-1/2}). \end{aligned}$$
From Equation (A5), \(r\varvec{V}_{c}=O_{p}(1)\). We can further derive that
$$\begin{aligned} \varvec{V}=\varvec{D}_{x}^{-1}\varvec{V}_{c}\varvec{D}_{x}^{-1}=O_{P|\mathcal {F}_{n}}(r^{-1}). \end{aligned}$$
Thus, by Theorem 1, we have
$$\begin{aligned} \varvec{V}^{-1/2}(\widetilde{\varvec{\beta }}-\widehat{\varvec{\beta }})=O_{P|\mathcal {F}_{n}}(1). \end{aligned}$$
From Equation (A3),
$$\begin{aligned}&\varvec{V}^{-1/2}(\widetilde{\varvec{\beta }}-\widehat{\varvec{\beta }})\\&\quad =-\varvec{V}^{-1/2}\widetilde{\varvec{D}}_{x}^{-1}\frac{\dot{l}^{*}(\widehat{\varvec{\beta }})}{n}\\&\quad =-\varvec{V}^{-1/2}\bigg \{\varvec{D}_{x}^{-1}+(\widetilde{\varvec{D}}_{x}^{-1}-\varvec{D}_{x}^{-1})\bigg \}\frac{\dot{l}^{*}(\widehat{\varvec{\beta }})}{n}\\&\quad =-\varvec{V}^{-1/2}\varvec{D}_{x}^{-1}\frac{\dot{l}^{*}(\widehat{\varvec{\beta }})}{n}+\\&\quad \quad \bigg \{-\varvec{V}^{-1/2}(\widetilde{\varvec{D}}_{x}^{-1}-\varvec{D}_{x}^{-1})\bigg \}\frac{\dot{l}^{*}(\widehat{\varvec{\beta }})}{n}\\&\quad =-\varvec{V}^{-1/2}\varvec{D}_{x}^{-1}\frac{\dot{l}^{*}(\widehat{\varvec{\beta }})}{n}+O_{P|\mathcal {F}_{n}}(r^{-1/2})\\&\quad =-\varvec{V}^{-1/2}\varvec{D}_{x}^{-1}\varvec{V}_{c}^{1/2}\varvec{V}_{c}^{-1/2}\frac{\dot{l}^{*}(\widehat{\varvec{\beta }})}{n}+O_{P|\mathcal {F}_{n}}(r^{-1/2}), \end{aligned}$$
Furthermore,
$$\begin{aligned}&\textrm{Var}(\varvec{V}^{-1/2}\varvec{D}_{x}^{-1}\varvec{V}_{c}^{1/2})\\&\quad =\varvec{V}^{-1/2}\varvec{D}_{x}^{-1}\varvec{V}_{c}^{1/2} (\varvec{V}^{-1/2}\varvec{D}_{x}^{-1}\varvec{V}_{c}^{1/2})^{\top }\\&\quad =\varvec{V}^{-1/2}\varvec{D}_{x}^{-1}\varvec{V}_{c}^{1/2}\varvec{V}_{c}^{1/2}\varvec{D}_{x}^{-1}\varvec{V}^{-1/2}=\varvec{I}. \end{aligned}$$
Thus,
$$\begin{aligned} \textrm{Var}(\varvec{V}^{-1/2}(\widetilde{\varvec{\beta }}-\widehat{\varvec{\beta }}))=\varvec{I}. \end{aligned}$$
By Slutsky Theorem and Eq. (A6),
$$\begin{aligned} \varvec{V}^{-1/2}(\widetilde{\varvec{\beta }}-\widehat{\varvec{\beta }})\xrightarrow []{d} N(\textbf{0},\varvec{I}). \end{aligned}$$
\(\square \)
Proof of Theorem 3
From Theorem 2,
$$\begin{aligned} \mathrm{{tr}}(\varvec{V})&=\mathrm{{tr}}(\varvec{D}_{x}^{-1}\varvec{V}_{c}\varvec{D}_{x}^{-1})\\&=\mathrm{{tr}}\bigg \{\varvec{D}_{x}^{-1}\frac{1}{rn^{2}}\sum _{i=1}^{n}\frac{w_{i}^{2}(y_{i}-\mu _i)^{2}\varvec{X}_{i}\varvec{X}_{i}^{\top }}{\pi _{i}}\varvec{D}_{x}^{-1}\bigg \}\\&=\frac{1}{rn^{2}}\sum _{i=1}^{n}\mathrm{{tr}}\bigg \{\frac{w_{i}^{2}(y_{i}-\mu _i)^{2}}{\pi _{i}}\varvec{D}_{x}^{-1}\varvec{X}_{i}\varvec{X}_{i}^{\top }\varvec{D}_{x}^{-1}\bigg \}\\&=\frac{1}{rn^{2}}\sum _{i=1}^{n}\bigg \{\frac{w_{i}^{2}(y_{i}-\mu _i)^{2}}{\pi _{i}}\Vert \varvec{D}_{x}^{-1}\varvec{X}_{i}\Vert ^{2}\bigg \}\\&=\frac{1}{rn^{2}}\sum _{i=1}^{n}(\sqrt{\pi _{i}})^{2}\sum _{i=1}^{n}\bigg \{\frac{|w_{i}||y_{i}-\mu _i|}{\sqrt{\pi _{i}}}\Vert \varvec{D}_{x}^{-1}\varvec{X}_{i}\Vert \bigg \}^{2}\\&\ge \frac{1}{rn^{2}}\sum _{i=1}^{n}\bigg (\sqrt{\pi _{i}}\frac{|w_{i}||y_{i}-\mu _i|}{\sqrt{\pi _{i}}}\Vert \varvec{D}_{x}^{-1}\varvec{X}_{i}\Vert \bigg )^{2}. \end{aligned}$$
By Cauchy–Schwarz inequality, the equality holds if and only if \(\sqrt{\pi _{i}}\varpropto \frac{|w_{i}||y_{i}-\mu _i|}{\sqrt{\pi _{i}}}\Vert \varvec{D}_{x}^{-1}\varvec{X}_{i}\Vert \). Thus, when SSP \(\pi _{i}^{\mathrm{{mmse}}}=\frac{\pi _{i}}{\sum _{j=1}^{n}\pi _{j}}=\frac{|w_{i}||y_{i}-\mu _i|\Vert \varvec{D}_{x}^{-1}\varvec{X}_{i}\Vert }{\sum _{j=1}^{n}|w_{j}||y_{j}-\mu _j|\Vert \varvec{D}_{x}^{-1}\varvec{X}_{j}\Vert }\), \(\mathrm{{tr}}(\varvec{V})\) achieves minimum. \(\square \)
Proof of Theorem 4
From Theorem 2,
$$\begin{aligned}&\mathrm{{tr}}(\varvec{V}_{c})\\&\quad =\mathrm{{tr}}\bigg \{\frac{1}{rn^{2}}\sum _{i=1}^{n}\frac{w_{i}^{2}(y_{i}-\mu _i)^{2}\varvec{X}_{i}\varvec{X}_{i}^{\top }}{\pi _{i}}\bigg \}\\&\quad =\frac{1}{rn^{2}}\sum _{i=1}^{n}\mathrm{{tr}}\bigg \{\frac{w_{i}^{2}(y_{i}-\mu _i)^{2}}{\pi _{i}}\varvec{X}_{i}\varvec{X}_{i}^{\top }\bigg \}\\&\quad =\frac{1}{rn^{2}}\sum _{i=1}^{n}\bigg \{\frac{w_{i}^{2}(y_{i}-\mu _i)^{2}}{\pi _{i}}\Vert \varvec{X}_{i}\Vert ^{2}\bigg \}\\&\quad =\frac{1}{rn^{2}}\sum _{i=1}^{n}(\sqrt{\pi _{i}})^{2}\sum _{i=1}^{n}\bigg \{\frac{|w_{i}||y_{i}-\mu _i|}{\sqrt{\pi _{i}}}\Vert \varvec{X}_{i}\Vert \bigg \}^{2}\\&\quad \ge \frac{1}{rn^{2}}\sum _{i=1}^{n}\bigg \{\sqrt{\pi _{i}}\frac{|w_{i}||y_{i}-\mu _i|}{\sqrt{\pi _{i}}}\Vert \varvec{X}_{i}\Vert \bigg \}^{2}. \end{aligned}$$
According to Cauchy–Schwarz inequality, the equality holds if and only if \(\sqrt{\pi _{i}}\varpropto \frac{|w_{i}||y_{i}-\mu _i|\Vert \varvec{X}_{i}\Vert }{\sqrt{\pi _{i}}}\). Thus, when subsampling probability \(\pi _{i}^{\mathrm{{mV_c}}}=\frac{\pi _{i}}{\sum _{j=1}^{n}\pi _{j}}=\frac{|w_{i}||y_{i}-\mu _i|\Vert \varvec{X}_{i}\Vert }{\sum _{j=1}^{n}|w_{j}||y_{j}-\mu _j|\Vert \varvec{X}_{j}\Vert }\), \(\textrm{tr}(\varvec{V}_{c})\) achieves minimum. \(\square \)
Proof of Lemma 2
For any integers \(j_1, j_2 \in [1,p]\), let
$$\begin{aligned} \widetilde{\varvec{D}}^{j_1,j_2}_x = \frac{1}{n}\bigg \{\frac{1}{r}\sum _{i=1}^{r}\frac{1}{\pi _{i}^{*}(\widetilde{\varvec{\beta }}_{0})}w_{i}^{*}x_{ij_{1}}^{*}x_{ij_{2}}^{*}+\varvec{S}_\lambda ^{j_{1}j_{2}}\bigg \} \end{aligned}$$
be the component of \(\widetilde{\varvec{D}}_x\).
$$\begin{aligned}&\textrm{Var}(\widetilde{\varvec{D}}_{x}^{j_{1}j_{2}}|\mathcal {F}_{n},\widetilde{\varvec{\beta }}_{0})\\&\,\, =\textrm{Var}\bigg [\frac{1}{n}\bigg \{\frac{1}{r}\sum _{i=1}^{r}\frac{1}{\pi _{i}^{*}(\widetilde{\varvec{\beta }}_{0})}w_{i}^{*}x_{ij_{1}}^{*}x_{ij_{2}}^{*} - \varvec{S}_\lambda ^{j_{1}j_{2}}\bigg \}|\mathcal {F}_{n}\bigg ]\\&\,\,=\textrm{Var}\bigg \{\frac{1}{nr}\sum _{i=1}^{r}\frac{1}{\pi _{i}^{*}(\widetilde{\varvec{\beta }}_{0})}w_{i}^{*}x_{ij_{1}}^{*}x_{ij_{2}}^{*}\bigg \}\\&\,\,\le \frac{1}{r}\sum _{i=1}^{n}\pi _{i}(\widetilde{\varvec{\beta }}_{0})\bigg \{\frac{1}{n\pi _{i}(\widetilde{\varvec{\beta }}_{0})}w_{i}x_{ij_{1}}x_{ij_{2}}\\&\,\, - \frac{1}{nr}\sum _{i=1}^{n}w_{i}x_{ij_{1}}x_{ij_{2}}\bigg \}^{2}\\&\,\,=\frac{1}{rn^{2}}\sum _{i=1}^{n}\frac{w_{i}^{2}(x_{ij_{1}}x_{ij_{2}})^{2}}{\pi _{i}(\widetilde{\varvec{\beta }}_{0})}-\frac{1}{r}\bigg (\frac{1}{nr}\sum _{i=1}^{n}w_{i}x_{ij_{1}}x_{ij_{2}}\bigg )^{2}\\&\,\,\le \frac{1}{rn^{2}}\sum _{i=1}^{n}\frac{w_{i}^{2}\Vert \varvec{X}_{i}\Vert ^{4}}{\pi _{i}(\widetilde{\varvec{\beta }}_{0})}-\frac{1}{r}\bigg (\frac{1}{nr}\sum _{i=1}^{n}w_{i}x_{ij_{1}}x_{ij_{2}}\bigg )^{2}. \end{aligned}$$
Since \(w_{i}^{2}\le M_{1}\),
$$\begin{aligned}&\textrm{Var}(\widetilde{\varvec{D}}_{x}^{j_{1}j_{2}}|\mathcal {F}_{n},\widetilde{\varvec{\beta }}_{0})\\&\,\,\le \frac{M_{1}}{rn^{2}}\sum _{i=1}^{n}\frac{\Vert \varvec{X}_{i}\Vert ^{4}}{\pi _{i}(\widetilde{\varvec{\beta }}_{0})}-\frac{1}{r}\bigg (\frac{1}{nr}\sum _{i=1}^{n}w_{i}x_{ij_{1}}x_{ij_{2}}\bigg )^{2}\\&\,\,=O_{P|\mathcal {F}_{n}}\left( r^{-1/2}\right) . \end{aligned}$$
Under Assumptions 1, 2 and 3, by Markov’s Inequality,
$$\begin{aligned}&P\bigg \{(\widetilde{\varvec{D}}_{x}^{\widetilde{\varvec{\beta }}_{0}}-\varvec{D}_{x}) \ge a\bigg \}\\&\,\,\le \frac{\textrm{E}\bigg (\widetilde{\varvec{D}}_{x}^{\widetilde{\varvec{\beta }}_{0}}-\varvec{D}_{x}\bigg )^{2}}{a} =\frac{\textrm{Var}\bigg (\widetilde{\varvec{D}}_{x}^{\widetilde{\varvec{\beta }}_{0}}|\mathcal {F}_{n}\bigg )}{a}\\&\,\,=O_{P|\mathcal {F}_{n}}\left( r^{-1/2}\right) , \end{aligned}$$
Because
$$\begin{aligned}&\frac{1}{n}\frac{\partial l_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})}{\partial \varvec{\beta }}\\&\quad =\frac{1}{n}\bigg \{\frac{1}{r}\sum _{i=1}^{r}\frac{1}{\pi _{i}^{*}(\widetilde{\varvec{\beta }}_{0})}w_{i}^{*}(y_{i}^{*}-\mu _i)\varvec{X}_{i}^{*} - \varvec{S}_\lambda {\hat{\varvec{\beta }}}\bigg \},\\&\quad \textrm{E}\bigg \{\frac{1}{n}\frac{\partial l_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})}{\partial \varvec{\beta }}|\mathcal {F}_{n}\bigg \} =E_{\widetilde{\varvec{\beta }}_{0}}\bigg [\textrm{E}\bigg \{\frac{1}{n}\frac{\partial l^{*}({\hat{\varvec{\beta }}})}{\partial \varvec{\beta }}|\mathcal {F}_{n},\widetilde{\varvec{\beta }}_{0}\bigg \}\bigg ]\\&\quad =E_{\widetilde{\varvec{\beta }}_{0}}\bigg \{\frac{1}{n}\frac{\partial l({\hat{\varvec{\beta }}})}{\partial \varvec{\beta }}|\mathcal {F}_{n}\bigg \}\\&\quad =\frac{1}{n}\frac{\partial l({\hat{\varvec{\beta }}})}{\partial \varvec{\beta }}\\&\quad =0. \end{aligned}$$
Moreover,
$$\begin{aligned}&\textrm{Var}\bigg \{\frac{1}{n}\frac{\partial l_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})}{\partial \varvec{\beta }}|\mathcal {F}_{n}\bigg \}\\&\quad =\textrm{Var}\bigg [\frac{1}{n}\bigg \{\frac{1}{r}\sum _{i=1}^{r}\frac{1}{\pi _{i}^{*}(\widetilde{\varvec{\beta }}_{0})}w_{i}^{*}(y_{i}^{*}-\mu _i)\varvec{X}_{i}^{*} \\&\qquad - \varvec{S}_\lambda {\hat{\varvec{\beta }}}\bigg \}|\mathcal {F}_{n}\bigg ]\\&\quad =\textrm{Var}\bigg \{\frac{1}{nr}\sum _{i=1}^{r}\frac{1}{\pi _{i}^{*}(\widetilde{\varvec{\beta }}_{0})}w_{i}^{*}(y_{i}^{*}-\mu _i)\varvec{X}_{i}^{*}|\mathcal {F}_{n}\bigg \}\\&\quad =\frac{1}{rn^{2}}\sum _{i=1}^{n}\frac{1}{\pi _{i}(\widetilde{\varvec{\beta }}_{0})}w_{i}^{2}(y_{i}-\mu _i)^{2}\varvec{X}_{i}\varvec{X}_{i}^{\top }\\&\quad \le \frac{1}{rn^{2}}\sum _{i=1}^{n}\frac{w_{i}^{2}|y_{i}-\mu _i|^{2}\Vert \varvec{X}_{i}\Vert ^{2}}{\pi _{i}(\widetilde{\varvec{\beta }}_{0})}. \end{aligned}$$
Note that \(w_{i}^{2}\le M_{1}\) and \(|y_{i}-\mu _i|^{2}\le M_{2}\). Thus,
$$\begin{aligned} \textrm{Var}\bigg \{\frac{1}{n}\frac{\partial l_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})}{\partial \varvec{\beta }}|\mathcal {F}_{n}\bigg \} \le \frac{M_{1}M_{2}}{rn^{2}}\sum _{i=1}^{n}\frac{\Vert \varvec{X}_{i}\Vert ^{2}}{\pi _{i}(\widetilde{\varvec{\beta }}_{0})} =O_{P|\mathcal {F}_{n}}(r^{-1}). \end{aligned}$$
Given Assumption 2 holds, from Markov’s Inequality,
$$\begin{aligned}&P\bigg [\bigg \{\frac{1}{n}\frac{\partial l_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})}{\partial \varvec{\beta }}-\frac{1}{n}\frac{\partial l({\hat{\varvec{\beta }}})}{\partial \varvec{\beta }}\bigg \}^{2}\ge a\bigg ]\\&\qquad \le \frac{\textrm{Var}\bigg \{\frac{1}{n}\frac{\partial l_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})}{\partial \varvec{\beta }}\bigg \}}{a}=O_{P|\mathcal {F}_{n}}(r^{-1}). \end{aligned}$$
Equation (14) is proved. Thus, Lemma 2 is proved. \(\square \)
Proof of Theorem 5
For \(\forall \ 0< u, v < 1\), the Tayler expansion of \(\dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}(\breve{\varvec{\beta }}_j)\) at \({\hat{\varvec{\beta }}}\) is:
$$\begin{aligned} \dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}(\breve{\varvec{\beta }}_j)=\dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}}_j)+\frac{\partial \dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}}_j)}{\partial \varvec{\beta }_j}(\breve{\varvec{\beta }}_j-{\hat{\varvec{\beta }}}_j)+R_{\widetilde{\varvec{\beta }}_{0}}=0, \end{aligned}$$
where
$$\begin{aligned} R_{\widetilde{\varvec{\beta }}_{0}} =&(\breve{\varvec{\beta }}_j-{\hat{\varvec{\beta }}}_j)^{\top }\\&\times \int _{0}^{1}\int _{0}^{1}\frac{\partial ^{2}\dot{l}_{\widetilde{\varvec{\beta }}_{0},j}^{*}\{{\hat{\varvec{\beta }}}_j+uv(\breve{\varvec{\beta }}_j-{\hat{\varvec{\beta }}}_j)\}}{\partial \varvec{\beta }_j\partial \varvec{\beta }^{\top }_j} v\textrm{d}u\textrm{d}v\\&\times (\breve{\varvec{\beta }}_j-{\hat{\varvec{\beta }}}_j). \end{aligned}$$
According to Chapter 4 of Ferguson (1996), \(\bigg \Vert \frac{\partial ^{2}\dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}(\varvec{\beta })}{\partial \varvec{\beta }\partial \varvec{\beta }^{\top }}\bigg \Vert =0\) is true for \(\forall \varvec{\beta }\). Thus,
$$\begin{aligned}&\bigg \Vert \int _{0}^{1}\int _{0}^{1}\frac{\partial ^{2}\dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}\{{\hat{\varvec{\beta }}}_j+uv(\breve{\varvec{\beta }}_j-{\hat{\varvec{\beta }}}_j)\}}{\partial \varvec{\beta }_j\partial \varvec{\beta }^{\top }_j} v\textrm{d}u\textrm{d}v \bigg \Vert \\&\,\,\le \int _{0}^{1}\int _{0}^{1} \bigg \Vert \frac{\partial ^{2}\dot{l}_{\widetilde{\varvec{\beta }}_{0 },j}^{*}\{{\hat{\varvec{\beta }}}_j+uv(\breve{\varvec{\beta }}_j-{\hat{\varvec{\beta }}}_j)\}}{\partial \varvec{\beta }_j\partial \varvec{\beta }^{\top }_j}\bigg \Vert v\textrm{d}u\textrm{d}v\\&\,\,=0. \end{aligned}$$
Using the result from Lemma 2, we can get
$$\begin{aligned} \breve{\varvec{\beta }}-{\hat{\varvec{\beta }}}=-\bigg (\widetilde{\varvec{D}}_{x}^{\widetilde{\varvec{\beta }}_{0}}\bigg )^{-1}\frac{\dot{l}_{\widetilde{\varvec{\beta }}_{0},j}^{*}({\hat{\varvec{\beta }}})}{n}=O_{P|\mathcal {F}_{n}}(r^{-1/2}). \end{aligned}$$
\(\square \)
Proof of Theorem 6
Note that
$$\begin{aligned}&\frac{\dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})}{n}\\&\,\,=\frac{1}{r}\sum _{i=1}^{r}\frac{1}{n\pi _{i}^{*}(\widetilde{\varvec{\beta }}_{0})}w_{i}^{*}(y_{i}^{*}-\mu _i)\varvec{X}_{i}^{*} - \frac{1}{n}\varvec{S}_\lambda {\hat{\varvec{\beta }}}\\&\,\,=\frac{1}{r}\sum _{i=1}^{r}\bigg \{\frac{1}{n\pi _{i}^{*}(\widetilde{\varvec{\beta }}_{0})}w_{i}^{*}(y_{i}^{*}-\mu _i)\varvec{X}_{i}^{*} - \frac{1}{n}\varvec{S}_\lambda {\hat{\varvec{\beta }}}\bigg \}\\ :=&\frac{1}{r}\sum _{i=1}^{r}\varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}, \end{aligned}$$
Furthermore, by Lemma 2,
$$\begin{aligned} & \textrm{E}\bigg \{\frac{1}{n}\dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})\bigg \}=0,\\ & \textrm{Var}\bigg \{\frac{1}{n}\dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})\bigg \}=\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}}=O_{P|\mathcal {F}_{n}}(r^{-1}). \end{aligned}$$
Conditional on \(\mathcal {F}_{n}\) and \(\widetilde{\varvec{\beta }}_{0}\), \(\varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}\) are independent and identically distributed and \(\textrm{E}\bigg (\varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}|\mathcal {F}_{n},\widetilde{\varvec{\beta }}_{0}\bigg )=0\), \(i=1,2,...,r\). Thus, one can further derive that \(\textrm{Var}\bigg (\varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}|\mathcal {F}_{n},\widetilde{\varvec{\beta }}_{0}\bigg )=r\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}}=O_{P|\mathcal {F}_{n}}(1)\), \(i=1,2,...,r\). Moreover, for \(\forall \varepsilon >0\), there exists \(\tau >0\) such that
$$\begin{aligned} \sum _{i=1}^{r}\textrm{E}\bigg \{&\Vert r^{-1/2}\varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}\Vert ^{2}I(\Vert r^{-1/2}\varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}\Vert>\varepsilon )|\mathcal {F}_{n},\widetilde{\varvec{\beta }}_{0}\bigg \}\\ =\sum _{i=1}^{r}\textrm{E}\bigg \{&\Vert r^{-1/2}\varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}\Vert ^{2}I(\Vert \varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}\Vert>r^{1/2}\varepsilon )|\mathcal {F}_{n},\widetilde{\varvec{\beta }}_{0}\bigg \}\\ \le \sum _{i=1}^{r}\textrm{E}\bigg \{&\Vert r^{-1/2}\varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}\Vert ^{2}\bigg (\frac{\Vert \varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}\Vert }{r^{1/2}\varepsilon }\bigg )^{\tau }I(\Vert \varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}\Vert \\&>r^{1/2}\varepsilon )|\mathcal {F}_{n},\widetilde{\varvec{\beta }}_{0}\bigg \}\\ =\frac{1}{r^{1+\frac{\tau }{2}}\varepsilon ^{\tau }}&\sum _{i=1}^{r}\textrm{E}\bigg \{\Vert \varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}\Vert ^{2+\tau }I(\Vert \varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}\Vert \\&>r^{1/2}\varepsilon )|\mathcal {F}_{n},\widetilde{\varvec{\beta }}_{0}\bigg \}\\ \le \frac{1}{r^{1+\frac{\tau }{2}}\varepsilon ^{\tau }}&\textrm{E}\bigg (\sum _{i=1}^{r}\Vert \varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}\Vert ^{2+\tau }\bigg )\\ =\frac{1}{r^{1+\frac{\tau }{2}}\varepsilon ^{\tau }}&\textrm{E}\bigg \{\sum _{i=1}^{r}\bigg \Vert \frac{1}{n\pi _{i}^{*}(\widetilde{\varvec{\beta }}_{0})}w_{i}^{*}(y_{i}^{*}-\mu _i)\varvec{X}_{i}^{*} \\&- \frac{1}{n}\varvec{S}_\lambda {\hat{\varvec{\beta }}}\bigg \Vert ^{2+\tau }\bigg \}\\ \le \frac{1}{r^{1+\frac{\tau }{2}}\varepsilon ^{\tau }}&\sum _{i=1}^{r}\bigg \{\frac{\Vert w_{i}^{*}(y_{i}^{*}-\mu _i)\varvec{X}_{i}^{*}\Vert ^{2+\tau }}{n^{2+\tau }\{\pi _{i}^{*}(\widetilde{\varvec{\beta }}_{0})\}^{2+\tau }}\\&+\frac{\Vert \varvec{S}_\lambda {\hat{\varvec{\beta }}}\Vert ^{2+\tau }}{n^{2+\tau }}\bigg \}\\ =\frac{1}{r^{\frac{\tau }{2}}\varepsilon ^{\tau }}\sum _{i=1}^{n}&\frac{\Vert w_{i}(y_{i}-\mu _i)\varvec{X}_{i}^{*}\Vert ^{2+\tau }}{n^{2+\tau }\{\pi _{i}(\widetilde{\varvec{\beta }}_{0})\}^{1+\tau }} \\&+\frac{1}{r^{\frac{\tau }{2}}\varepsilon ^{\tau }}\frac{\Vert \varvec{S}_\lambda {\hat{\varvec{\beta }}}\Vert ^{2+\tau }}{n^{2+\tau }}\\ \le \frac{1}{r^{\frac{\tau }{2}}\varepsilon ^{\tau }}\sum _{i=1}^{n}&\frac{|w_{i}|^{2+\tau }|y_{i}-\mu _i|^{2+\tau }\Vert \varvec{X}_{i}\Vert ^{2+\tau }}{n^{2+\tau }\{\pi _{i}(\widetilde{\varvec{\beta }}_{0})\}^{1+\tau }}\\&+\frac{1}{r^{\frac{\tau }{2}}\varepsilon ^{\tau }}\frac{\Vert \varvec{S}_\lambda \Vert ^{2+\tau }\Vert {\hat{\varvec{\beta }}}\Vert ^{2+\tau }}{n^{2+\tau }}. \end{aligned}$$
Given Assumption 3, by Lindeberg-Feller central limit theorem,
$$\begin{aligned}&\frac{1}{n}(\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\dot{l}_{\widetilde{\varvec{\beta }}_{0}}({\hat{\varvec{\beta }}})\nonumber \\&\quad =r^{-\frac{1}{2}}\bigg \{\textrm{Var}\bigg (\varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}|\mathcal {F}_{n},\widetilde{\varvec{\beta }}_{0}\bigg )\bigg \}^{-\frac{1}{2}}\sum _{i=1}^{r}\varvec{\eta }_{i}^{\widetilde{\varvec{\beta }}_{0}}\xrightarrow []{d}N(\textbf{0},\varvec{I}). \end{aligned}$$
(7)
By Theorem 5, we have
$$\begin{aligned} (\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}(\breve{\varvec{\beta }}-{\hat{\varvec{\beta }}})=O_{P|\mathcal {F}_{n}}(1). \end{aligned}$$
By Lemma 2 and Theorem 5,
$$\begin{aligned}&(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}(\breve{\varvec{\beta }}-{\hat{\varvec{\beta }}})\\&\quad =(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}(\widetilde{\varvec{D}}_{x}^{\widetilde{\varvec{\beta }}_{0}})^{-1}\frac{\dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})}{n}\\&\quad =-(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\varvec{D}_{x}^{-1}\frac{\dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})}{n} \\&\qquad +\bigg [-(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\bigg \{(\widetilde{\varvec{D}}_{x}^{\widetilde{\varvec{\beta }}_{0}})^{-1}-\varvec{D}_{x}^{-1}\bigg \}\bigg ]\frac{\dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})}{n}\\&\quad =-(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\varvec{D}_{x}^{-1}\frac{\dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})}{n} +O_{P|\mathcal {F}_{n}}(r^{-1/2})\\&\quad =-(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\varvec{D}_{x}^{-1}(\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}})^{1/2}(\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\frac{\dot{l}_{\widetilde{\varvec{\beta }}_{0}}^{*}({\hat{\varvec{\beta }}})}{n} \\&\quad \quad +O_{P|\mathcal {F}_{n}}(r^{-1/2}). \end{aligned}$$
Furthermore,
$$\begin{aligned}&\textrm{Var}\bigg \{(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\varvec{D}_{x}^{-1}(\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}})^{1/2}\bigg \}\\&\quad =(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\varvec{D}_{x}^{-1}(\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}})^{1/2}\bigg \{(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\varvec{D}_{x}^{-1}\\&\quad \quad (\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}})^{1/2}\bigg \}^{\top }\\&\quad =(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\varvec{D}_{x}^{-1}\varvec{V}_{c}\varvec{D}_{x}^{-1}(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2} \\&\quad \quad +(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\varvec{D}_{x}^{-1}(\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}}-\varvec{V}_{c})\varvec{D}_{x}^{-1}(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\\&\quad =(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2} \\&\quad \quad +(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\varvec{D}_{x}^{-1}(\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}}-\varvec{V}_{c})\varvec{D}_{x}^{-1}(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\\&\quad =\varvec{I}+(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\varvec{D}_{x}^{-1}(\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}}-\varvec{V}_{c})\varvec{D}_{x}^{-1}(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}. \end{aligned}$$
Note that
$$\begin{aligned}&\Vert \varvec{V}_{c}-\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}}\Vert =\bigg \Vert \frac{1}{rn^{2}}\sum _{i=1}^{n}\frac{w_{i}^{2}(y_{i}-\mu _i)^{2}\varvec{X}_{i}\varvec{X}_{i}^{\top }}{\pi _{i}^{\mathrm{{mV_c}}}}\\&\quad -\frac{1}{rn^{2}}\sum _{i=1}^{n}\frac{w_{i}^{2}(y_{i}-\mu _i)^{2}\varvec{X}_{i}\varvec{X}_{i}^{\top }}{\pi _{i}(\widetilde{\varvec{\beta }}_{0})}\bigg \Vert \\&\quad =\bigg \Vert \frac{1}{rn^{2}}\sum _{i=1}^{n}w_{i}^{2}(y_{i}-\mu _i)^{2}\varvec{X}_{i}\varvec{X}_{i}^{\top }\\&\qquad \bigg (\frac{1}{\pi _{i}^{\mathrm{{mV_c}}}}-\frac{1}{\pi _{i}(\widetilde{\varvec{\beta }}_{0})}\bigg )\bigg \Vert \\&\quad \le \frac{1}{rn^{2}}\sum _{i=1}^{n}w_{i}^{2}|y_{i}-\mu _i|^{2}\Vert \varvec{X}_{i}\Vert ^{2}\bigg \Vert \frac{1}{\pi _{i}^{\mathrm{{mV_c}}}}-\frac{1}{\pi _{i}(\widetilde{\varvec{\beta }}_{0})}\bigg \Vert . \end{aligned}$$
Since \(w_{i}^{2}\le M_{1}\) and \(|y_{i}- \mu _i|^{2}\le M_{2}\),
$$\begin{aligned} \Vert \varvec{V}_{c}-\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}}\Vert \le \frac{M_{1}M_{2}}{rn^{2}}\sum _{i=1}^{n}\frac{\Vert \varvec{X}_{i}\Vert ^{2}}{\pi _{i}^{\mathrm{{mV_c}}}}\bigg \Vert 1-\frac{\pi _{i}^{\mathrm{{mV_c}}}}{\pi _{i}(\widetilde{\varvec{\beta }}_{0})}\bigg \Vert , \end{aligned}$$
where
$$\begin{aligned} \bigg \Vert 1-\frac{\pi _{i}^{\mathrm{{mV_c}}}}{\pi _{i}(\widetilde{\varvec{\beta }}_{0})}\bigg \Vert ^{2}&=\frac{1}{|\pi _{i}(\widetilde{\varvec{\beta }}_{0})|^{2}}\Vert \pi _{i}(\widetilde{\varvec{\beta }}_{0})-\pi _{i}^{\mathrm{{mV_c}}}\Vert ^{2}\\&\le \frac{\Vert \pi _{i}(\widetilde{\varvec{\beta }}_{0})-\pi _{i}^{\mathrm{{mV_c}}}\Vert ^{2}}{k^{2}}=o_{P}(1). \end{aligned}$$
Thus, according to Assumption 3,
$$\begin{aligned} \Vert \varvec{V}_{c}-\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}}\Vert \le o_{P|\mathcal {F}_{n}}(r^{-1}). \end{aligned}$$
Thus,
$$\begin{aligned} \textrm{Var}\bigg \{(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}\varvec{D}_{x}^{-1}(\varvec{V}_{c}^{\widetilde{\varvec{\beta }}_{0}})^{1/2}\bigg \}=\varvec{I}+o_{P|\mathcal {F}_{n}}(1). \end{aligned}$$
Thus, by Eq. (7),
$$\begin{aligned} \textrm{Var}\bigg \{(\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}(\breve{\varvec{\beta }}-{\hat{\varvec{\beta }}})\bigg \} = \varvec{I}. \end{aligned}$$
By Slutsky Theorem and Eq. (7),
$$\begin{aligned} (\varvec{V}^{\widetilde{\varvec{\beta }}_{0}})^{-1/2}(\breve{\varvec{\beta }}-{\hat{\varvec{\beta }}})\xrightarrow []{d} N(\textbf{0},\varvec{I}). \end{aligned}$$
\(\square \)