\documentclass[12pt,a4paper,oneside]{report}
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage[a4paper,left=3cm,right=2.5cm,top=2.5cm,bottom=2.5cm]{geometry}
\usepackage{setspace}
\usepackage{graphicx}
\usepackage{amsmath,amssymb}
\usepackage{booktabs}
\usepackage[round]{natbib}
\usepackage[hidelinks]{hyperref}
\onehalfspacing
\begin{document}
\begin{titlepage}
\begin{center}
\vspace*{2cm}
{\large\bfseries University of Example}\\[0.3cm]
{\large Faculty of Science and Engineering}\\[5cm]
{\LARGE\bfseries Attention-Based Models for Time-Series Forecasting}\\[0.6cm]
{\Large Master's Thesis}\\[3cm]
\begin{tabular}{rl}
\textbf{Author:} & First Last \\
\textbf{Student ID:} & 12345678 \\
\textbf{Supervisor:} & Dr. Example Supervisor \\
\textbf{Second Reader:} & Dr. Example Reader \\
\textbf{Program:} & M.Sc. Computer Science \\
\end{tabular}\\[4cm]
{\large\today}
\end{center}
\end{titlepage}
\chapter*{Declaration}
I declare that this thesis is my own work and has not been submitted elsewhere.
\vspace{3em}
\noindent First Last, \today.
\chapter*{Abstract}
Time-series forecasting is a central problem in many scientific domains.
In this thesis, we develop a family of attention-based models for long-horizon
forecasting and show that they achieve state-of-the-art results on six benchmarks
while being 3x more compute-efficient than prior transformers.
\tableofcontents
\chapter{Introduction}
\section{Motivation}
Accurate forecasting drives decisions in finance, energy, and health.
\section{Research Questions}
\begin{enumerate}
\item Can attention match recurrence on long sequences at lower cost?
\item How do we regularize attention to avoid overfitting on small datasets?
\item What are the failure modes on non-stationary series?
\end{enumerate}
\chapter{Related Work}
Classical ARIMA \citep{box2015time} and modern deep models.
\chapter{Methodology}
Given a series $x_{1:T}$, we predict $x_{T+1:T+H}$ using a block of
attention layers with logarithmic sparsity.
\chapter{Experiments}
\begin{table}[h]
\centering
\caption{MAPE across benchmarks.}
\begin{tabular}{lcccc}
\toprule
Method & ETT & ECL & Traffic & Weather \\
\midrule
ARIMA & 7.8 & 6.5 & 11.2 & 5.4 \\
LSTM & 6.1 & 5.9 & 9.8 & 4.6 \\
Transformer& 5.2 & 5.1 & 8.4 & 4.1 \\
\textbf{Ours} & \textbf{4.6} & \textbf{4.7} & \textbf{7.3} & \textbf{3.5} \\
\bottomrule
\end{tabular}
\end{table}
\chapter{Discussion}
The gains come chiefly from the sparsity pattern.
\chapter{Conclusion}
We have demonstrated a practical and effective forecasting architecture.
\bibliographystyle{plainnat}
\begin{thebibliography}{9}
\bibitem[Box et al.(2015)]{box2015time} G.~Box et al. \emph{Time Series Analysis}. Wiley, 2015.
\bibitem[Vaswani et al.(2017)]{vaswani2017attention} A.~Vaswani et al. Attention Is All You Need. NeurIPS, 2017.
\end{thebibliography}
\end{document}

PDF Preview
Create an account to compile and preview