<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://coolscript.net/index.php?action=history&amp;feed=atom&amp;title=Ollama_Systemd_Unit_File_Sample</id>
	<title>Ollama Systemd Unit File Sample - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://coolscript.net/index.php?action=history&amp;feed=atom&amp;title=Ollama_Systemd_Unit_File_Sample"/>
	<link rel="alternate" type="text/html" href="https://coolscript.net/index.php?title=Ollama_Systemd_Unit_File_Sample&amp;action=history"/>
	<updated>2026-06-02T16:17:37Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.40.1</generator>
	<entry>
		<id>https://coolscript.net/index.php?title=Ollama_Systemd_Unit_File_Sample&amp;diff=1147&amp;oldid=prev</id>
		<title>Admin: Created page with &quot;== Ollama systemd override (annotated) ==  === Resource limits === &lt;code&gt; MemoryMax=16G # hard RAM cap (kills process if exceeded)&lt;br&gt; MemoryHigh=14G # soft limit (throttling before hard cap)&lt;br&gt; CPUQuota=400% # limit to ~4 CPU cores &lt;/code&gt;  === File descriptors === &lt;code&gt; LimitNOFILE=1048576 # prevent &quot;too many open files&quot; &lt;/code&gt;  === Restart / stability === &lt;code&gt; Restart=on-failure # restart only on crashes&lt;br&gt; RestartSec=3 # delay before restart&lt;br&gt; StartLimitInter...&quot;</title>
		<link rel="alternate" type="text/html" href="https://coolscript.net/index.php?title=Ollama_Systemd_Unit_File_Sample&amp;diff=1147&amp;oldid=prev"/>
		<updated>2026-04-30T16:29:13Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;== Ollama systemd override (annotated) ==  === Resource limits === &amp;lt;code&amp;gt; MemoryMax=16G # hard RAM cap (kills process if exceeded)&amp;lt;br&amp;gt; MemoryHigh=14G # soft limit (throttling before hard cap)&amp;lt;br&amp;gt; CPUQuota=400% # limit to ~4 CPU cores &amp;lt;/code&amp;gt;  === File descriptors === &amp;lt;code&amp;gt; LimitNOFILE=1048576 # prevent &amp;quot;too many open files&amp;quot; &amp;lt;/code&amp;gt;  === Restart / stability === &amp;lt;code&amp;gt; Restart=on-failure # restart only on crashes&amp;lt;br&amp;gt; RestartSec=3 # delay before restart&amp;lt;br&amp;gt; StartLimitInter...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;== Ollama systemd override (annotated) ==&lt;br /&gt;
&lt;br /&gt;
=== Resource limits ===&lt;br /&gt;
&amp;lt;code&amp;gt;&lt;br /&gt;
MemoryMax=16G # hard RAM cap (kills process if exceeded)&amp;lt;br&amp;gt;&lt;br /&gt;
MemoryHigh=14G # soft limit (throttling before hard cap)&amp;lt;br&amp;gt;&lt;br /&gt;
CPUQuota=400% # limit to ~4 CPU cores&lt;br /&gt;
&amp;lt;/code&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== File descriptors ===&lt;br /&gt;
&amp;lt;code&amp;gt;&lt;br /&gt;
LimitNOFILE=1048576 # prevent &amp;quot;too many open files&amp;quot;&lt;br /&gt;
&amp;lt;/code&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== Restart / stability ===&lt;br /&gt;
&amp;lt;code&amp;gt;&lt;br /&gt;
Restart=on-failure # restart only on crashes&amp;lt;br&amp;gt;&lt;br /&gt;
RestartSec=3 # delay before restart&amp;lt;br&amp;gt;&lt;br /&gt;
StartLimitIntervalSec=60&amp;lt;br&amp;gt;&lt;br /&gt;
StartLimitBurst=5 # avoid infinite restart loops&lt;br /&gt;
&amp;lt;/code&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== IO / disk behavior ===&lt;br /&gt;
&amp;lt;code&amp;gt;&lt;br /&gt;
IOSchedulingClass=best-effort&amp;lt;br&amp;gt;&lt;br /&gt;
IOSchedulingPriority=4&lt;br /&gt;
&amp;lt;/code&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== Optional CPU pinning ===&lt;br /&gt;
&amp;lt;code&amp;gt;&lt;br /&gt;
&lt;br /&gt;
CPUAffinity=0 1 2 3 # bind to specific cores&lt;br /&gt;
&amp;lt;/code&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== Security (light sandboxing) ===&lt;br /&gt;
&amp;lt;code&amp;gt;&lt;br /&gt;
NoNewPrivileges=true&amp;lt;br&amp;gt;&lt;br /&gt;
PrivateTmp=true&amp;lt;br&amp;gt;&lt;br /&gt;
ProtectSystem=full&amp;lt;br&amp;gt;&lt;br /&gt;
ProtectHome=true&lt;br /&gt;
&amp;lt;/code&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== Logging ===&lt;br /&gt;
&amp;lt;code&amp;gt;&lt;br /&gt;
StandardOutput=journal&amp;lt;br&amp;gt;&lt;br /&gt;
StandardError=journal&amp;lt;br&amp;gt;&lt;br /&gt;
LogRateLimitIntervalSec=0&lt;br /&gt;
&amp;lt;/code&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== Ollama tuning ===&lt;br /&gt;
&amp;lt;code&amp;gt;&lt;br /&gt;
Environment=&amp;quot;OLLAMA_NUM_PARALLEL=2&amp;quot; # concurrent requests&amp;lt;br&amp;gt;&lt;br /&gt;
Environment=&amp;quot;OLLAMA_MAX_LOADED_MODELS=1&amp;quot; # avoid VRAM exhaustion&amp;lt;br&amp;gt;&lt;br /&gt;
Environment=&amp;quot;OLLAMA_KEEP_ALIVE=5m&amp;quot; # unload after idle&amp;lt;br&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Environment=&amp;quot;OLLAMA_HOST=127.0.0.1&amp;quot; # bind local only&lt;br /&gt;
&amp;lt;/code&amp;gt;&lt;br /&gt;
&lt;br /&gt;
== Minimal working example ==&lt;br /&gt;
&amp;lt;code&amp;gt;&lt;br /&gt;
[Service]&amp;lt;br&amp;gt;&lt;br /&gt;
MemoryMax=16G&amp;lt;br&amp;gt;&lt;br /&gt;
CPUQuota=400%&amp;lt;br&amp;gt;&lt;br /&gt;
LimitNOFILE=1048576&amp;lt;br&amp;gt;&lt;br /&gt;
Restart=on-failure&amp;lt;br&amp;gt;&lt;br /&gt;
RestartSec=3&amp;lt;br&amp;gt;&lt;br /&gt;
Environment=&amp;quot;OLLAMA_NUM_PARALLEL=2&amp;quot;&amp;lt;br&amp;gt;&lt;br /&gt;
Environment=&amp;quot;OLLAMA_MAX_LOADED_MODELS=1&amp;quot;&lt;br /&gt;
&amp;lt;/code&amp;gt;&lt;/div&gt;</summary>
		<author><name>Admin</name></author>
	</entry>
</feed>