Build an Expo app
Hands-on tutorial on using QVAC SDK with Expo.
What we'll build
We'll build an LLM chat mobile application using the following stack:
- Expo for for building and running a React Native app; and
- QVAC to run LLM inference locally.
Prerequisites
- npm v10.9
- Linux/macOS (Windows with small adjustments)
- Mobile development environment with a physical device (iOS or Android)
Due to limitations with llamacpp, QVAC currently does not run on emulators.
You must use a physical device.
On Windows
Some commands are Bash‑specific. On Windows, use PowerShell/WSL or adapt them.
Step 1: set up your development environment
Follow Expo official documentation to set it up on your machine:
Confirm you can run the default template on your device.
Step 2: set up an Expo project
Let's use the official Expo scaffold to create a minimal app structure.
Create a new project:
npx create-expo-app@latest qvac-expo-chat --template blank-typescript
cd qvac-expo-chatThis creates a minimal TypeScript Expo project with a single App.tsx entry point.
Start the Expo dev server:
npx expo startThen run the app on your physical device following Expo's official workflow.
At this point, you should see the default Expo screen rendered on your phone. This verifies your Expo setup before we add QVAC.
Step 3: install QVAC
In this step we'll add the QVAC SDK dependency and complete the Expo-specific installation steps required for running local inference on a physical mobile device.
Install the SDK:
npm i @qvac/sdkStep 4: make a smoke test
Before creating the chat UI, we'll run a smoke test to validate the full QVAC lifecycle in an Expo app.
Replace the contents of App.tsx with the following code:
import React, { useEffect, useState } from "react";
import { SafeAreaView, StyleSheet, Text, View } from "react-native";
import {
completion,
downloadAsset,
LLAMA_3_2_1B_INST_Q4_0,
loadModel,
unloadModel,
VERBOSITY,
} from "@qvac/sdk";
export default function App() {
// UI state for progress and the streamed output text.
const [status, setStatus] = useState("Starting…");
const [output, setOutput] = useState("");
useEffect(() => {
// Track model lifecycle and cancellation across async steps.
let modelId: string | null = null;
let cancelled = false;
(async () => {
try {
// 1) Ensure the model file is available on device.
setStatus("Downloading model…");
await downloadAsset({ assetSrc: LLAMA_3_2_1B_INST_Q4_0 });
if (cancelled) return;
// 2) Load the model into memory.
setStatus("Loading model…");
modelId = await loadModel({
modelSrc: LLAMA_3_2_1B_INST_Q4_0,
modelType: "llm",
modelConfig: {
device: "gpu",
ctx_size: 2048,
verbosity: VERBOSITY.ERROR,
},
});
if (cancelled) return;
// 3) Run a streaming completion and update UI as tokens arrive.
setStatus("Running completion…");
const result = completion({
modelId,
history: [{ role: "user", content: "Say hello in one short sentence." }],
stream: true,
});
let acc = "";
for await (const token of result.tokenStream) {
acc += token;
if (!cancelled) setOutput(acc);
}
setStatus("Done ✅");
} catch (e: any) {
setStatus(`Error: ${e?.message ?? String(e)}`);
}
})();
return () => {
// Cleanup: cancel any in-flight work and unload the model.
cancelled = true;
if (modelId) {
void unloadModel({ modelId, clearStorage: false }).catch(() => {});
}
};
}, []);
return (
<SafeAreaView style={styles.safe}>
{/* Minimal UI for the smoke test: title, status, streamed output. */}
<View style={styles.container}>
<Text style={styles.h1}>QVAC Smoke Test</Text>
<Text style={styles.status}>{status}</Text>
<Text style={styles.output}>{output}</Text>
</View>
</SafeAreaView>
);
}
const styles = StyleSheet.create({
// Basic dark theme layout.
safe: { flex: 1, backgroundColor: "#0B0B0F" },
container: { flex: 1, padding: 16, gap: 12 },
h1: { color: "white", fontSize: 18, fontWeight: "600" },
status: { color: "#A7A7B3" },
output: { color: "white", fontSize: 16, lineHeight: 22 },
});Run the app on your physical device:
# From the project root:
npx expo run:android --device
# or
npx expo run:ios --deviceOn the first run, the model may take a while to download and load. Keep an eye on the terminal logs. To confirm the smoke test worked you should see:
- A status line progressing through Downloading model…, Loading model…, and Running completion…
- A short assistant output streaming into the UI
- A final status of Done ✅
Step 5: add the chat UI
Now that QVAC is working in your Expo app, we'll replace the smoke test UI with a minimal chat interface:
- A message list with left/right bubbles
- A text input for composing messages
- Streaming updates into the latest assistant message bubble
Replace the contents of App.tsx with the following code:
import React, { useEffect, useMemo, useRef, useState } from "react";
import {
ActivityIndicator,
FlatList,
KeyboardAvoidingView,
Platform,
SafeAreaView,
StyleSheet,
Text,
TextInput,
View,
} from "react-native";
import {
completion,
downloadAsset,
LLAMA_3_2_1B_INST_Q4_0,
loadModel,
unloadModel,
VERBOSITY,
} from "@qvac/sdk";
// Basic chat message shape for the UI.
type Role = "user" | "assistant";
type ChatMessage = { id: string; role: Role; content: string };
function makeId() {
// Lightweight unique-ish ID for list keys and message tracking.
return `${Date.now()}-${Math.random().toString(16).slice(2)}`;
}
export default function App() {
// Model lifecycle state.
const [modelId, setModelId] = useState<string | null>(null);
const [status, setStatus] = useState<string>("Initializing…");
const [downloadPct, setDownloadPct] = useState<number | null>(null);
// Chat UI state.
const [input, setInput] = useState("");
const [messages, setMessages] = useState<ChatMessage[]>([]);
const [isGenerating, setIsGenerating] = useState(false);
// Keep refs to the list and latest message array for async usage.
const listRef = useRef<FlatList<ChatMessage>>(null);
const messagesRef = useRef<ChatMessage[]>([]);
messagesRef.current = messages;
// Enable send only when ready and input isn't empty.
const canSend = useMemo(() => {
return !!modelId && !isGenerating && input.trim().length > 0;
}, [modelId, isGenerating, input]);
// Keep scrolled to bottom as messages grow.
useEffect(() => {
const t = setTimeout(() => {
listRef.current?.scrollToEnd({ animated: true });
}, 0);
return () => clearTimeout(t);
}, [messages]);
useEffect(() => {
// Initialize the model once on mount.
let cancelled = false;
(async () => {
try {
setStatus("Downloading model…");
// Download the model file and update the progress UI.
await downloadAsset({
assetSrc: LLAMA_3_2_1B_INST_Q4_0,
onProgress: (p: any) => {
// Some SDKs emit ratio (0..1), others percent (0..100).
const pct =
typeof p?.progress === "number"
? p.progress <= 1
? Math.round(p.progress * 100)
: Math.round(p.progress)
: null;
if (!cancelled) setDownloadPct(pct);
},
});
if (cancelled) return;
// Load the model into memory so we can run completions.
setStatus("Loading model into memory…");
const id = await loadModel({
modelSrc: LLAMA_3_2_1B_INST_Q4_0,
modelType: "llm",
modelConfig: {
device: "gpu", // switch to "cpu" if needed
ctx_size: 2048,
verbosity: VERBOSITY.ERROR,
},
});
if (cancelled) return;
setModelId(id);
setStatus("Ready");
setDownloadPct(null);
} catch (e: any) {
if (!cancelled) {
setStatus(`Init failed: ${e?.message ?? String(e)}`);
}
}
})();
return () => {
// Cleanup on unmount: stop updates and unload the model.
cancelled = true;
// Cleanup: unload the model (don’t clear cache by default).
// Note: React cleanup can’t be async directly, so we fire-and-forget.
const id = modelId;
if (id) {
void unloadModel({ modelId: id, clearStorage: false }).catch(() => {});
}
};
// Intentionally do NOT depend on modelId to avoid re-running init.
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []);
async function handleSend() {
// Guard against sending before the model is ready or while generating.
if (!modelId || isGenerating) return;
const trimmed = input.trim();
if (!trimmed) return;
setInput("");
setIsGenerating(true);
// Append user message and a placeholder assistant message for streaming.
const userMsg: ChatMessage = { id: makeId(), role: "user", content: trimmed };
const assistantId = makeId();
const assistantMsg: ChatMessage = { id: assistantId, role: "assistant", content: "" };
setMessages((prev) => [...prev, userMsg, assistantMsg]);
try {
// Build chat history for the completion request.
const history = [...messagesRef.current, userMsg].map((m) => ({
role: m.role,
content: m.content,
}));
// Run a streaming completion and update the last assistant bubble.
const result = completion({
modelId,
history,
stream: true,
});
let acc = "";
for await (const token of result.tokenStream) {
acc += token;
// Update only the last assistant message content
setMessages((prev) =>
prev.map((m) => (m.id === assistantId ? { ...m, content: acc } : m))
);
}
// Optional: stats (log only)
try {
const stats = await result.stats;
console.log("📊 Completion stats:", stats);
} catch {}
} catch (e: any) {
// Show any error in the assistant bubble.
setMessages((prev) =>
prev.map((m) =>
m.id === assistantId
? { ...m, content: `❌ Error: ${e?.message ?? String(e)}` }
: m
)
);
} finally {
setIsGenerating(false);
}
}
return (
<SafeAreaView style={styles.safe}>
{/* Chat layout: header, message list, input row, and hint. */}
<KeyboardAvoidingView
style={styles.safe}
behavior={Platform.OS === "ios" ? "padding" : undefined}
keyboardVerticalOffset={Platform.OS === "ios" ? 8 : 0}
>
<View style={styles.header}>
<Text style={styles.title}>QVAC Expo Chat</Text>
<Text style={styles.subtitle}>
{status}
{downloadPct != null ? ` (${downloadPct}%)` : ""}
</Text>
</View>
<View style={styles.chat}>
<FlatList
ref={listRef}
data={messages}
keyExtractor={(m) => m.id}
renderItem={({ item }) => (
<View
style={[
styles.bubble,
item.role === "user" ? styles.bubbleUser : styles.bubbleAssistant,
]}
>
<Text style={styles.bubbleText}>{item.content}</Text>
</View>
)}
contentContainerStyle={styles.chatContent}
/>
</View>
<View style={styles.inputRow}>
<TextInput
style={styles.input}
value={input}
onChangeText={setInput}
placeholder={modelId ? "Type a message…" : "Loading model…"}
editable={!!modelId && !isGenerating}
returnKeyType="send"
onSubmitEditing={handleSend}
blurOnSubmit={false}
/>
{isGenerating ? <ActivityIndicator /> : null}
</View>
<Text style={styles.hint}>
Press “send/enter” to submit. Messages are streamed token-by-token.
</Text>
</KeyboardAvoidingView>
</SafeAreaView>
);
}
const styles = StyleSheet.create({
// Simple dark theme chat UI.
safe: { flex: 1, backgroundColor: "#0B0B0F" },
header: { paddingHorizontal: 16, paddingTop: 12, paddingBottom: 8 },
title: { color: "white", fontSize: 18, fontWeight: "600" },
subtitle: { color: "#A7A7B3", marginTop: 4 },
chat: { flex: 1 },
chatContent: { paddingHorizontal: 16, paddingVertical: 12, gap: 10 },
bubble: {
maxWidth: "85%",
paddingHorizontal: 12,
paddingVertical: 10,
borderRadius: 14,
},
bubbleUser: {
alignSelf: "flex-end",
backgroundColor: "#2B2BFF",
},
bubbleAssistant: {
alignSelf: "flex-start",
backgroundColor: "#1A1A22",
},
bubbleText: { color: "white", lineHeight: 20 },
inputRow: {
paddingHorizontal: 16,
paddingVertical: 10,
borderTopWidth: StyleSheet.hairlineWidth,
borderTopColor: "#2A2A33",
flexDirection: "row",
gap: 10,
alignItems: "center",
},
input: {
flex: 1,
backgroundColor: "#121219",
color: "white",
paddingHorizontal: 12,
paddingVertical: 10,
borderRadius: 12,
},
hint: {
paddingHorizontal: 16,
paddingBottom: 12,
color: "#7E7E8A",
fontSize: 12,
},
});The assistant response is streamed token by token as the model generates text.
Task completed
Run the app again on your physical device:
npx expo run:ios --device
# or
npx expo run:android --deviceOn the first run, the model may download from peers (watch the terminal for progress). Once it finishes, type a message and press Enter or click Send — the response should stream into the UI token by token: