alpaca_prompt = """
<|start_header_id|>system<|end_header_id|>
{}
<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
{}""" #Because we operate mostly off of completions, we need this extra token
tokenizer.eos_token='<|eot_id|>'
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
print("Model loaded")
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
alpaca_prompt.format(
"", #system
"Describe yourself in 1 sentence.", #user
""# output - leave this blank for generation!
)
], return_tensors = "pt").to("cuda")
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)