cell 1
`%%capture
import os, re
!pip install --force-reinstall unsloth
!pip install --force-reinstall unsloth_zoo
!pip install transformers==4.57.1
!pip install --no-deps trl==0.22.2
!pip install jiwer`
cell 2
`from datasets import load_dataset
from huggingface_hub import login
Login to Hugging Face (if private dataset)
login() # Enter your token
`
cell 3
`from datasets import load_dataset
dataset = load_dataset("avishadilhara/sinhala-ocr-lk-acts-1010")
test_dataset = dataset["test"]
print(f"\nTest set: {len(test_dataset)} samples")
Preview
sample = test_dataset[0]
print(f" Image size: {sample['image'].size}")
print(f" Text length: {len(sample['text'])} chars")
print(f" Preview: {sample['text'][:100]}...")
`
cell 4
`import jiwer
from unsloth import FastVisionModel
model, tokenizer = FastVisionModel.from_pretrained(
model_name="/kaggle/input/models/avishauow/qwen-vl-3/pytorch/r16-bs4-ga1-lr2e-4-rtx3090gpu/1/sinhala_qwen3vl_ocr_lora",
load_in_4bit=True,
)
FastVisionModel.for_inference(model)
`
cell 5
`
image = dataset['test'][0]['image']
ref_text = dataset['test'][0]['text'] # ground truth reference
test_instruction = "Perform OCR on this image and extract all the text exactly as it appears."
messages = [
{"role": "user",
"content": [
{"type": "image"},
{"type": "text", "text": test_instruction}
]}
]
input_text = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=False,
)
print(type(input_text), input_text[:200])
inputs = tokenizer(
image,
input_text,
add_special_tokens=False,
return_tensors="pt",
).to("cuda")
`