fix: Dense context 65K→98K, parallel restored to 2
This commit is contained in:
+1
-1
@@ -27,7 +27,7 @@ GPU_MAX_CONCURRENT = {
|
|||||||
# Context window sizes (tokens) — used for compaction signals
|
# Context window sizes (tokens) — used for compaction signals
|
||||||
GPU_CONTEXT = {
|
GPU_CONTEXT = {
|
||||||
"qwen3.6-35B-A3B": 131072,
|
"qwen3.6-35B-A3B": 131072,
|
||||||
"qwen3.6-27B-code": 65536,
|
"qwen3.6-27B-code": 98304,
|
||||||
"qwen3.5-9b-vlm": 131072,
|
"qwen3.5-9b-vlm": 131072,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user