\documentclass[sigconf,screen]{acmart}
\usepackage{graphicx}
\usepackage{amsmath,amssymb}
\usepackage{booktabs}
\acmConference[SIGMOD '26]{ACM SIGMOD International Conference on Management of Data}{June 2026}{City, Country}
\acmISBN{978-1-4503-XXXX-X/26/06}
\acmDOI{10.1145/XXXXXXX.XXXXXXX}
\setcopyright{acmlicensed}
\copyrightyear{2026}
\acmYear{2026}
\begin{document}
\title{Lumos: Cost-Aware Adaptive Indexing for Cloud OLAP}
\author{First Last}
\affiliation{\institution{University of Example}\country{Country}}
\email{[email protected]}
\author{Jane Doe}
\affiliation{\institution{Example Research Labs}\country{Country}}
\email{[email protected]}
\author{John Smith}
\affiliation{\institution{University of Example}\country{Country}}
\email{[email protected]}
\renewcommand{\shortauthors}{Last et al.}
\begin{abstract}
Cloud OLAP workloads expose a new cost structure: storage is cheap but
scans are expensive, and query shapes shift faster than static indexes
can adapt. Lumos is an adaptive indexing system that continuously
rebuilds data layouts based on observed query patterns and a cost model
tuned for object storage. On TPC-H at 10TB, Lumos reduces scan bytes
by 4.3$\times$ and query cost by 3.1$\times$ versus partitioned Parquet.
\end{abstract}
\begin{CCSXML}
<ccs2012><concept><concept_id>10002951.10002952</concept_id>
<concept_desc>Information systems~Data layout</concept_desc>
<concept_significance>500</concept_significance></concept></ccs2012>
\end{CCSXML}
\ccsdesc[500]{Information systems~Data layout}
\keywords{adaptive indexing, OLAP, cloud databases}
\maketitle
\section{Introduction}
Modern OLAP systems on cloud storage pay to scan rather than to update,
creating opportunity for continuous, cost-aware layout optimization.
\section{Background}
Z-ordering, data skipping, LSM trees, database cracking.
\section{Design}
Lumos continuously maintains a small set of candidate layouts and uses
observed queries to evaluate relative value. The cost model is trained
online from actual scan costs.
\subsection{Cost Model}
\begin{equation}
c(Q, L) = \alpha \cdot \text{scan}(L) + \beta \cdot \text{seek}(L).
\end{equation}
\section{Implementation}
Lumos is 28{,}000 lines of Go on top of Apache Iceberg tables.
\section{Evaluation}
\begin{table}[t]
\centering
\begin{tabular}{lcc}
\toprule
Layout & Scan bytes (GB) & \$/query \\
\midrule
Partitioned Parquet & 420 & 1.24 \\
Z-order & 210 & 0.78 \\
Liquid Clustering & 150 & 0.61 \\
\textbf{Lumos} & \textbf{97} & \textbf{0.40} \\
\bottomrule
\end{tabular}
\caption{TPC-H 10TB, averaged over 22 queries.}
\end{table}
\section{Related Work}
Adaptive indexing, learned indexes, cost models.
\section{Conclusion}
Cloud-era OLAP deserves layouts that evolve with workload.
\bibliographystyle{ACM-Reference-Format}
\bibliography{refs}
\end{document}

PDF Preview
Create an account to compile and preview