#Share your Codex configs

1 messages · Page 1 of 1 (latest)

stable osprey
#
# YOLO
# sandbox_mode="danger-full-access"
# network_access="true"

[model_providers.zai]
name = "zai"
base_url = "https://api.z.ai/api/coding/paas/v4"
env_key="Z_AI_API_KEY"

[profiles.glm4-6-high-detailed]
model = "glm-4.6"
model_provider = "zai"
model_reasoning_effort = "high"
model_reasoning_summary = "detailed"
model_supports_reasoning_summaries = true
# YOLO
# approval_policy = "never"

#
[model_providers.ollama_cloud]
name = "Ollama Cloud"
base_url = "https://ollama.com"
env_key="OLLAMA_API_KEY"


[profiles.gpt-oss-120b_cloud]
model = "gpt-oss:120b"
model_provider = "ollama_cloud"
model_reasoning_effort = "high"
model_reasoning_summary = "detailed"

[profiles.qwen3-coder-480b_cloud]
model = "qwen3-coder:408b"
model_provider = "ollama_cloud"
model_reasoning_effort = "high"
model_reasoning_summary = "detailed"

[profiles.kimi-k2-1t_cloud]
model = "kimi-k2:1t"
model_provider = "ollama_cloud"
model_reasoning_effort = "high"
model_reasoning_summary = "detailed"

[profiles.deepseek-v3-1-671b_cloud]
model = "deepseek-v3.1:671b"
model_provider = "ollama_cloud"
model_reasoning_effort = "high"
model_reasoning_summary = "detailed"
#

run codex --profile gpt-oss-120b_cloud

set what ever you put for env_key to the target API's key. I use direnv to manage environment variables. You do what works

You could put them in your bashrc, or write a script with your key in it.

For ollama for example, you set

export OLLAMA_API_KEY=key

in .bashrc or .envrc

The simplest possible way would be

OLLAMA_API_KEY=<key> codex --profile gpt-oss-120b_cloud

or

export OLLAMA_API_KEY=<key> 
codex --profile gpt-oss-120b_cloud # do some work, close it
codex # Back to using the codex model tied to your plus/pro/buisness account

to test your configs are right.

#

Openrouter.ai has some free models.

[model_providers.openrouter]
name = "openrouter"
base_url = "https://openrouter.ai/api/v1"
env_key="OPEN_ROUTER_KEY"

[profiles.openrouter-gpt-oss-20b]
model = "openai/gpt-oss-20b:free"
model_provider = "openrouter"
model_reasoning_effort = "high"
model_reasoning_summary = "detailed"
model_supports_reasoning_summaries = true
approval_policy = "never"
#

You can also use the cloud models from a local instance of ollama, it's easier if you already have it running you just ollama signin

[model_providers.ollama]
name = "Ollama Local"
base_url = "http://localhost:11434/v1"

[profiles.gpt-oss-120b-cloud-ollama-local]
model = "gpt-oss:120b-cloud"
model_provider = "ollama"
model_reasoning_effort = "high"
model_reasoning_summary = "detailed"

[profiles.qwen3-coder-480b-cloud-ollama-local]
model = "qwen3-coder:408b-cloud"
model_provider = "ollama"
model_reasoning_effort = "high"
model_reasoning_summary = "detailed"

[profiles.kimi-k2-1t-cloud-ollama-local]
model = "kimi-k2:1t-cloud"
model_provider = "ollama"
model_reasoning_effort = "high"
model_reasoning_summary = "detailed"

[profiles.deepseek-v3-1-671b-cloud-ollama-local]
model = "deepseek-v3.1:671b-cloud"
model_provider = "ollama"
model_reasoning_effort = "high"
model_reasoning_summary = "detailed"
#

[profiles.qwen3-8b]
model = "qwen3:8b"
model_provider = "ollama"
model_reasoning_effort = "high"
model_reasoning_summary = "detailed"
model_supports_reasoning_summaries = true
model_context_window=32000
approval_policy = "never"

# I was trying smaller versions of qwen with larger context sizes to 
# see if more context was a better use of memory than bigger models
# This is based on `qwen3:4b-thinking` with a context size of 100k
# The models limit is 256k, but it didn't fit on my gpu and you usually 
# get degraded performance pushing the models to their max 
# advertised limit
[profiles.qwen3-codex]
model = "error/qwen3-codex"
model_provider = "ollama"
model_reasoning_effort = "low"
model_context_window=100000
approval_policy = "never"

# This "runs" on my 4070 ti moble with 8gb of ram, but it is slower
[profiles.qwen3-14b]
model = "qwen3:14b"
model_provider = "ollama"
model_reasoning_effort = "high"
model_reasoning_summary = "detailed"

# these models didn't work well with codex. 
# Their system prompts alone are enough to use their entire context.
# might work with a simpler coding tool
[profiles.qwen2-5-7b]
model = "qwen2.5:7b"
model_provider = "ollama"

[profiles.qwen2-5-7b-coder]
model = "qwen2.5-coder:7b"
model_provider = "ollama"

[profiles.qwen2-5-codex]
model = "error/qwen2.5-codex"
model_provider = "ollama"
model_context_window=100000
approval_policy = "never"

# This will call a single tool every turn, it can technically code but it will be slow.
# There is a utility to this if you are deliberate with it
[profiles.llama3-1-codex]
model = "error/llama3.1-codex"
model_provider = "ollama"
model_context_window=128000
approval_policy = "never"
#
# helps reduce the number of files it reads, makes it take more deliberate action.
# Reduces the number of tokens required for a job by allowing more precise code searches.
[mcp_servers."serena"]
command = "uvx"
args = ["--from", "git+https://github.com/oraios/serena", "serena", "start-mcp-server"]
cwd = "/home/err/devel/promethean"