# Load model if not cached
cache_key = f"{experiment_name}:{checkpoint}"
if cache_key not in _model_cache:
model_path = Path("/checkpoints") / "experiments" / experiment_name / checkpoint
if not model_path.exists():
raise ValueError(f"Model not found at {model_path}")
print(f"Loading fine-tuned model: {model_path}")
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=str(model_path),
max_seq_length=8192,
dtype=None,
load_in_4bit=True,
)
tokenizer = get_chat_template(tokenizer, chat_template="qwen3")
FastLanguageModel.for_inference(model)
_model_cache[cache_key] = (model, tokenizer)
print(f"Fine-tuned model cached: {experiment_name}:{checkpoint}")
model, tokenizer = _model_cache[cache_key]
messages = [
{"from": "system", "value": system_prompt},
{"from": "human", "value": user_message},
]
inputs = tokenizer.apply_chat_template(
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
).to("cuda")
with torch.inference_mode():
outputs = model.generate(
input_ids=inputs,
max_new_tokens=max_new_tokens,
temperature=0.7,
top_p=0.9,
use_cache=True,
pad_token_id=tokenizer.eos_token_id,
)
generated_text = tokenizer.decode(
outputs[0][inputs.shape[1] :], skip_special_tokens=True
)
return generated_text
jinja2.exceptions.UndefinedError: 'dict object' has no attribute 'content' what's wrong?