Template for Elsevier journal submissions (Nature, ScienceDirect titles). Supports preprint and final layouts, line numbers for reviewers, multiple author affiliations, highlights, and graphical abstract hooks.
elsevier-elsarticle/main.tex
\documentclass[preprint,12pt,authoryear]{elsarticle}
\usepackage{amsmath,amssymb}
\usepackage{graphicx}
\usepackage{booktabs}
\usepackage{lineno}
\usepackage{algorithm2e}
\linenumbers
\journal{Journal of Example Research}
\begin{document}
\begin{frontmatter}
\title{Title of Your Paper: A Descriptive Subtitle}
\author[inst1,inst2]{First Author\corref{cor1}}
\ead{[email protected]}
\cortext[cor1]{Corresponding author}
\author[inst1]{Second Author}
\ead{[email protected]}
\author[inst2]{Third Author}
\affiliation[inst1]{
organization={Department of X, University A},
addressline={123 Campus Way},
city={City},
postcode={12345},
country={Country}
}
\affiliation[inst2]{
organization={Laboratory Y, Institute B},
city={City},
country={Country}
}
\begin{abstract}
The rapid growth of high-dimensional datasets has created an urgent need for
efficient feature-selection methods that scale to millions of variables while
preserving predictive accuracy. In this paper we introduce a graph-regularized
sparse learning framework that jointly optimizes feature relevance and
structural consistency. Evaluated on six benchmark datasets spanning genomics,
remote sensing, and natural language processing, the proposed method reduces
feature dimensionality by an average of 78\% while improving downstream
classification accuracy by 3--5 percentage points over state-of-the-art
baselines. These results demonstrate that exploiting inter-feature dependencies
during selection yields more compact and interpretable models without
sacrificing generalization performance.
\end{abstract}
\begin{highlights}
\item Graph-regularized sparse learning framework for scalable feature selection.
\item 78\% average dimensionality reduction across six benchmark datasets.
\item 3--5 pp accuracy improvement over existing baselines.
\end{highlights}
\begin{graphicalabstract}
\centering
\fbox{\parbox{0.8\textwidth}{\centering\vspace{3cm}
Graphical Abstract Placeholder\\[4pt]
Replace with a schematic of the proposed pipeline.
\vspace{3cm}}}
\end{graphicalabstract}
\begin{keyword}
feature selection \sep sparse learning \sep graph regularization \sep
high-dimensional data \sep classification
\end{keyword}
\end{frontmatter}
\section{Introduction}\label{sec:intro}
Modern data acquisition technologies routinely generate datasets with thousands
to millions of measured variables per observation. In fields such as genomics,
where a single microarray experiment produces expression levels for over 20\,000
genes, identifying the small subset of features relevant to a clinical outcome
is critical for both scientific interpretation and model deployment
\citep{ex1,ex3}. Traditional filter-based methods rank features independently,
ignoring correlations that can lead to redundant selections; wrapper methods, on
the other hand, are computationally prohibitive at this scale.
Embedded approaches that integrate selection into the learning objective offer a
practical middle ground \citep{ex2}. However, most existing formulations treat
features as exchangeable and overlook the rich structural information---such as
gene--gene interaction networks or spatial adjacency in remote-sensing
images---that is often available a priori. Failing to exploit this structure can
result in unstable feature subsets that vary significantly across resampling
folds \citep{ex4}.
In this work we make the following contributions:
\begin{enumerate}
\item We formulate a convex graph-regularized sparse learning objective that
penalizes both the $\ell_1$-norm of the coefficient vector and the
smoothness over a feature-similarity graph.
\item We derive an efficient proximal gradient solver with convergence
guarantees and $O(np)$ per-iteration cost.
\item We conduct extensive experiments on six diverse datasets, demonstrating
consistent improvements in accuracy, stability, and interpretability over
five competitive baselines.
\end{enumerate}
\section{Related Work}\label{sec:related}
\subsection{Filter and Wrapper Methods}
Filter methods such as mutual information \citep{ex1} and chi-squared tests
evaluate features in isolation and are fast but ignore feature interactions.
Wrapper approaches---including recursive feature elimination and sequential
forward selection---account for feature dependencies through repeated model
training but become impractical when the number of features exceeds a few
thousand \citep{ex3}.
\subsection{Embedded and Regularization-Based Methods}
Embedded methods incorporate selection directly into the training objective.
Lasso \citep{ex2} adds an $\ell_1$ penalty to linear regression, inducing
sparsity in the coefficient vector. Elastic net combines $\ell_1$ and $\ell_2$
penalties to handle correlated features, while group lasso extends sparsity to
predefined feature groups \citep{ex5}.
\subsection{Graph-Guided Selection}
Recent work has explored Laplacian regularization to enforce smoothness of
coefficients over a feature graph. \citet{ex4} proposed GraphNet for
neuroimaging applications, achieving spatially coherent activation maps. Our
method generalizes this idea to arbitrary graph structures and provides
tighter convergence bounds.
\section{Methods}\label{sec:method}
Let $\mathbf{X}\in\mathbb{R}^{n\times p}$ denote the data matrix with $n$
observations and $p$ features, and let $\mathbf{y}\in\mathbb{R}^n$ be the
response vector. We define the feature-similarity graph
$\mathcal{G}=(\mathcal{V},\mathcal{E})$ with Laplacian matrix $\mathbf{L}$.
The proposed objective is:
\begin{equation}\label{eq:objective}
\min_{\boldsymbol{\beta}} \;
\frac{1}{2n}\|\mathbf{y}-\mathbf{X}\boldsymbol{\beta}\|_2^2
+ \lambda_1 \|\boldsymbol{\beta}\|_1
+ \frac{\lambda_2}{2}\boldsymbol{\beta}^\top \mathbf{L}\boldsymbol{\beta},
\end{equation}
where $\lambda_1$ controls sparsity and $\lambda_2$ controls graph smoothness.
\begin{figure}[t]
\centering
\fbox{\parbox{0.9\linewidth}{\centering\vspace{4cm}
Figure Placeholder\\[4pt]
Replace with a diagram of the optimization pipeline
showing input data, graph construction, and sparse recovery.
\vspace{4cm}}}
\caption{Overview of the proposed graph-regularized sparse learning framework.
Input features are connected via a similarity graph, and the optimization
jointly enforces sparsity and graph smoothness.}\label{fig:pipeline}
\end{figure}
\section{Results}\label{sec:results}
Table~\ref{tab:main} summarizes classification accuracy across six benchmark
datasets. The proposed method consistently outperforms all baselines.
\begin{table}[h]
\centering
\caption{Classification accuracy (\%) on benchmark datasets.}\label{tab:main}
\begin{tabular}{lcc}
\toprule
Method & Metric A & Metric B \\
\midrule
Baseline & 0.72 & 0.68 \\
Proposed & \textbf{0.84} & \textbf{0.81} \\
\bottomrule
\end{tabular}
\end{table}
Table~\ref{tab:ablation} reports an ablation study isolating the effect of each
regularization term.
\begin{table}[h]
\centering
\caption{Ablation study: effect of regularization components.}\label{tab:ablation}
\begin{tabular}{lccc}
\toprule
Variant & Accuracy (\%) & Features Selected & Stability \\
\midrule
$\ell_1$ only (Lasso) & 81.3 & 142 & 0.61 \\
$\ell_1 + \ell_2$ (Elastic Net)& 82.7 & 168 & 0.69 \\
Graph only ($\lambda_1=0$) & 78.9 & 814 & 0.82 \\
Full model (Eq.~\ref{eq:objective}) & \textbf{84.2} & \textbf{98} & \textbf{0.88} \\
\bottomrule
\end{tabular}
\end{table}
\section{Discussion}\label{sec:discussion}
The results indicate that combining sparsity with graph regularization yields
feature subsets that are both compact and stable across resampling folds.
Notably, the full model selects 31\% fewer features than Lasso while improving
accuracy by nearly 3 percentage points. This suggests that the graph penalty
discourages the inclusion of redundant, highly correlated features that often
inflate Lasso solutions.
From a practical standpoint, the selected features align well with known
biological pathways in the genomics datasets, providing evidence that
graph-guided selection captures domain-relevant structure rather than
statistical artifacts. In the remote-sensing experiments, selected spectral
bands cluster into physically meaningful groups corresponding to vegetation
indices and moisture content.
\section{Conclusion}\label{sec:conclusion}
We presented a graph-regularized sparse learning framework for high-dimensional
feature selection. The method achieves state-of-the-art accuracy with
substantially fewer features and higher selection stability than existing
approaches. Future work will extend the framework to multi-task and multi-label
settings.
\section*{Data Availability Statement}
The benchmark datasets used in this study are publicly available from the UCI
Machine Learning Repository (\url{https://archive.ics.uci.edu}) and the Gene
Expression Omnibus (\url{https://www.ncbi.nlm.nih.gov/geo/}). Code for
reproducing all experiments is available at
\url{https://github.com/example/graph-sparse}.
\section*{Funding}
This work was supported by the National Science Foundation [grant number
DMS-1234567] and the European Research Council [grant agreement 987654].
\section*{CRediT authorship contribution statement}
\textbf{First Author}: Conceptualization, Methodology, Writing -- original draft.
\textbf{Second Author}: Software, Validation.
\textbf{Third Author}: Supervision.
\section*{Declaration of competing interest}
The authors declare no competing interests.
\bibliographystyle{elsarticle-harv}
\begin{thebibliography}{9}
\bibitem[Author(2024)]{ex1} Author, A., 2024. Title of work. \emph{Journal Name} 12, 1--10.
\bibitem[Tibshirani(1996)]{ex2} Tibshirani, R., 1996. Regression shrinkage and selection via the lasso. \emph{J.\ R.\ Stat.\ Soc.\ B} 58(1), 267--288.
\bibitem[Guyon and Elisseeff(2003)]{ex3} Guyon, I., Elisseeff, A., 2003. An introduction to variable and feature selection. \emph{J.\ Mach.\ Learn.\ Res.} 3, 1157--1182.
\bibitem[Grosenick et al.(2013)]{ex4} Grosenick, L., Klingenberg, B., Katovich, K., Knutson, B., Taylor, J.E., 2013. Interpretable whole-brain prediction analysis with GraphNet. \emph{NeuroImage} 72, 304--321.
\bibitem[Yuan and Lin(2006)]{ex5} Yuan, M., Lin, Y., 2006. Model selection and estimation in regression with grouped variables. \emph{J.\ R.\ Stat.\ Soc.\ B} 68(1), 49--67.
\end{thebibliography}
\end{document}

PDF Preview
Create an account to compile and preview