#whatsapp extract info from caption

1 messages · Page 1 of 1 (latest)

knotty onyx
#

extract Descriotion too. below is the handlerl.ts

type MsgCtx = {
from?: string;
body?: string;
timestamp?: number;
channelId?: string;
messageId?: string;
mediaPath?: string;
mediaType?: string;
};

function cleanSenderPhone(value: string | undefined): string {
if (!value) return "";
return value.replace(/^whatsapp:/, "").trim();
}

function isPlaceholderBody(body: string | undefined): boolean {
const text = (body ?? "").trim();
return !text || text.startsWith("<media:") || text === "[Image]";
}

function normalizeCaption(body: string | undefined): string {
const text = (body ?? "").trim();
if (!text) return "";
return text.replace(/^[WhatsApp[^]]]\s/i, "").trim();
}

function parseNumber(value: string | undefined): number | undefined {
if (!value) return undefined;
const normalized = value.replace(/[,\s₹]/g, "");
const num = Number(normalized);
return Number.isFinite(num) ? num : undefined;
}

function extractCaptionFields(body: string | undefined): {
name: string;
sellingPrice?: number;
purchasePrice?: number;
quantity?: number;
} {
const raw = normalizeCaption(body);

if (isPlaceholderBody(raw)) {
return { name: "Untitled product" };
}

const purchaseRegex =
/\b(?:pp|purchase(?:\sprice)?)\b\s[:=-]?\s*(?:rs.?|₹)?\s*([0-9][0-9,]*)\b/i;

const qtyRegex =
/\b(?:qty|quantity)\b\s*[:=-]?\s*([0-9][0-9,]*)\b/i;

const purchaseMatch = raw.match(purchaseRegex);
const purchasePrice = parseNumber(purchaseMatch?.[1]);

// Remove purchase fragment first so generic "price" does not capture it.
const withoutPurchase = raw.replace(
/\b(?:pp|purchase(?:\sprice)?)\b\s[:=-]?\s*(?:rs.?|₹)?\s*[0-9][0-9,]*\b/gi,
" ",
);

const sellingRegex =
/\b(?:sp|selling(?:\sprice)?|sale(?:\sprice)?|price)\b\s*[:=-]?\s*(?:rs.?|₹)?\s*([0-9][0-9,]*)\b/i;

const sellingMatch = withoutPurchase.match(sellingRegex);
const sellingPrice = parseNumber(sellingMatch?.[1]);

const qtyMatch = raw.match(qtyRegex);
const quantity = parseNumber(qtyMatch?.[1]);

ocean coralBOT
minor willowBOT
#

To help others find answers, you can mark your question as solved via Right click solution message -> Apps -> ✅ Mark Solution

ocean coralBOT
neon galeBOT
#

Yep, add description?: string to the return type, then derive it from either a description:/desc: field or the leftover caption text.

function extractCaptionFields(body: string | undefined): {
  name: string;
  description?: string;
  sellingPrice?: number;
  purchasePrice?: number;
  quantity?: number;
} {
  const raw = normalizeCaption(body);

  if (isPlaceholderBody(raw)) {
    return { name: "Untitled product" };
  }
#
  const purchaseRegex =
    /\b(?:pp|purchase(?:\s*price)?)\b\s*[:=\-]?\s*(?:rs\.?|₹)?\s*([0-9][0-9,]*)\b/i;

  const sellingRegex =
    /\b(?:sp|selling(?:\s*price)?|sale(?:\s*price)?|price)\b\s*[:=\-]?\s*(?:rs\.?|₹)?\s*([0-9][0-9,]*)\b/i;

  const qtyRegex =
    /\b(?:qty|quantity)\b\s*[:=\-]?\s*([0-9][0-9,]*)\b/i;

  const purchasePrice = parseNumber(raw.match(purchaseRegex)?.[1]);

  // Remove purchase first so generic "price" does not capture it.
  const withoutPurchase = raw.replace(
    /\b(?:pp|purchase(?:\s*price)?)\b\s*[:=\-]?\s*(?:rs\.?|₹)?\s*[0-9][0-9,]*\b/gi,
    " ",
#
  );

  const sellingPrice = parseNumber(
    withoutPurchase.match(sellingRegex)?.[1],
  );

  const quantity = parseNumber(raw.match(qtyRegex)?.[1]);

  const chunks = raw
    .split(/\r?\n|(?:\s+\|\s+)|(?:\s+•\s+)/)
    .map((part) => part.trim())
    .filter(Boolean);

  const isTaggedField = (text: string) =>
    /^(?:pp|purchase(?:\s*price)?|sp|selling(?:\s*price)?|sale(?:\s*price)?|price|qty|quantity)\b/i.test(
#
      text,
    );

  const descriptionChunk = chunks.find((text) =>
    /^(?:desc|description|details?)\b/i.test(text),
  );

  const explicitDescription = descriptionChunk
    ?.replace(/^(?:desc|description|details?)\b\s*[:=\-]?\s*/i, "")
    .trim();

  const contentChunks = chunks.filter(
    (text) =>
      !isTaggedField(text) &&
      !/^(?:desc|description|details?)\b/i.test(text),
#
  );

  const name = contentChunks[0] || "Untitled product";
  const description =
    explicitDescription || contentChunks.slice(1).join(" ").trim() || undefined;

  return {
    name,
    description,
    sellingPrice,
    purchasePrice,
    quantity,
  };
}
#

This supports:

  • Description: ...
  • Desc: ...
  • Details: ...

And if there is no explicit label, it uses the remaining text after the first chunk as description.

If you want, I can also make it work for comma-separated captions like:
Shirt, blue cotton, pp 200, sp 350, qty 2

knotty onyx
#

here is what I have

function extractCaptionFields(body: string | undefined): {
name: string;
description?: string;
sellingPrice?: number;
purchasePrice?: number;
quantity?: number;
} {
const raw = normalizeCaption(body);

if (isPlaceholderBody(raw)) {
return { name: "Untitled product" };
}

const purchaseRegex =
/\b(?:pp|purchase(?:\sprice)?)\b\s[:=-]?\s*(?:rs.?|₹)?\s*([0-9][0-9,]*)\b/i;

const qtyRegex =
/\b(?:qty|quantity)\b\s*[:=-]?\s*([0-9][0-9,]*)\b/i;

const purchaseMatch = raw.match(purchaseRegex);
const purchasePrice = parseNumber(purchaseMatch?.[1]);

// Remove purchase fragment first so generic "price" does not capture it.
const withoutPurchase = raw.replace(
/\b(?:pp|purchase(?:\sprice)?)\b\s[:=-]?\s*(?:rs.?|₹)?\s*[0-9][0-9,]*\b/gi,
" ",
);

const sellingRegex =
/\b(?:sp|selling(?:\sprice)?|sale(?:\sprice)?|price)\b\s*[:=-]?\s*(?:rs.?|₹)?\s*([0-9][0-9,]*)\b/i;

const sellingMatch = withoutPurchase.match(sellingRegex);
const sellingPrice = parseNumber(sellingMatch?.[1]);

const qtyMatch = raw.match(qtyRegex);
const quantity = parseNumber(qtyMatch?.[1]);

const cleanedName = raw
.replace(/\b(?:pp|purchase(?:\sprice)?)\b\s[:=-]?\s*(?:rs.?|₹)?\s*[0-9][0-9,]\b/gi, " ")
.replace(/\b(?:sp|selling(?:\s
price)?|sale(?:\sprice)?|price)\b\s[:=-]?\s*(?:rs.?|₹)?\s*[0-9][0-9,]\b/gi, " ")
.replace(/\b(?:qty|quantity)\b\s
[:=-]?\s*[0-9][0-9,]*\b/gi, " ")
.replace(/[|,;]+/g, " ")
.replace(/\s{2,}/g, " ")
.trim();

return {
name: cleanedName || "Untitled product",
...(sellingPrice !== undefined ? { sellingPrice } : {}),
...(purchasePrice !== undefined ? { purchasePrice } : {}),
...(quantity !== undefined ? { quantity } : {}),
};
}

neon galeBOT
#

You’re close, the main thing is: right now cleanedName still contains both name + description.

Use the leftover text, then split it into:

  • first chunk = name
  • remaining chunks = description
function extractCaptionFields(body: string | undefined): {
  name: string;
  description?: string;
  sellingPrice?: number;
  purchasePrice?: number;
  quantity?: number;
} {
  const raw = normalizeCaption(body);
#
  if (isPlaceholderBody(raw)) {
    return { name: "Untitled product" };
  }

  const purchaseRegex =
    /\b(?:pp|purchase(?:\s*price)?)\b\s*[:=\-]?\s*(?:rs\.?|₹)?\s*([0-9][0-9,]*)\b/i;

  const qtyRegex =
    /\b(?:qty|quantity)\b\s*[:=\-]?\s*([0-9][0-9,]*)\b/i;

  const purchaseMatch = raw.match(purchaseRegex);
  const purchasePrice = parseNumber(purchaseMatch?.[1]);

  const withoutPurchase = raw.replace(
    /\b(?:pp|purchase(?:\s*price)?)\b\s*[:=\-]?\s*(?:rs\.?|₹)?\s*[0-9][0-9,]*\b/gi,
#
    " ",
  );

  const sellingRegex =
    /\b(?:sp|selling(?:\s*price)?|sale(?:\s*price)?|price)\b\s*[:=\-]?\s*(?:rs\.?|₹)?\s*([0-9][0-9,]*)\b/i;

  const sellingMatch = withoutPurchase.match(sellingRegex);
  const sellingPrice = parseNumber(sellingMatch?.[1]);

  const qtyMatch = raw.match(qtyRegex);
  const quantity = parseNumber(qtyMatch?.[1]);

  const cleanedText = raw
    .replace(/\b(?:pp|purchase(?:\s*price)?)\b\s*[:=\-]?\s*(?:rs\.?|₹)?\s*[0-9][0-9,]*\b/gi, " ")
    .replace(/\b(?:sp|selling(?:\s*price)?|sale(?:\s*price)?|price)\b\s*[:=\-]?\s*(?:rs\.?|₹)?\s*[0-9][0-9,]*\b/gi, " ")
#
    .replace(/\b(?:qty|quantity)\b\s*[:=\-]?\s*[0-9][0-9,]*\b/gi, " ")
    .replace(/[|;]+/g, "\n")
    .replace(/\s{2,}/g, " ")
    .trim();

  const parts = cleanedText
    .split(/\r?\n/)
    .map((part) => part.trim())
    .filter(Boolean);

  const name = parts[0] || "Untitled product";
  const description = parts.slice(1).join(" ").trim() || undefined;

  return {
    name,
#
    ...(description ? { description } : {}),
    ...(sellingPrice !== undefined ? { sellingPrice } : {}),
    ...(purchasePrice !== undefined ? { purchasePrice } : {}),
    ...(quantity !== undefined ? { quantity } : {}),
  };
}

Example:

"Blue Shirt | Soft cotton fabric | pp 200 | sp 350 | qty 2"

Result:

#
{
  name: "Blue Shirt",
  description: "Soft cotton fabric",
  purchasePrice: 200,
  sellingPrice: 350,
  quantity: 2
}

One limitation: if name + description are in a single plain sentence with no separator/newline, you can’t reliably split them. If that’s your format, send one sample caption and I’ll tune the parser for it.

knotty onyx
#

and how to pass in

try {
await client.callTool({
name: "create_product_draft",
arguments: {
storeId: 1,
sourceMessageId: ctx.messageId ?? "",
senderPhone: cleanSenderPhone(ctx.from),
receivedAt: event.timestamp.toISOString(),
name: parsed.name,
...(parsed.sellingPrice !== undefined ? { sellingPrice: parsed.sellingPrice } : {}),
...(parsed.purchasePrice !== undefined ? { purchasePrice: parsed.purchasePrice } : {}),
...(parsed.quantity !== undefined ? { quantity: parsed.quantity } : {}),
images: [
{
base64,
mimeType: ctx.mediaType,
fileName: path.basename(ctx.mediaPath),
sortOrder: 0,
primary: true,
approved: true,
},
],
},
});
} finally {
await client.close();
}

neon galeBOT
#

Just pass it exactly like the other optional fields:

try {
  await client.callTool({
    name: "create_product_draft",
    arguments: {
      storeId: 1,
      sourceMessageId: ctx.messageId ?? "",
      senderPhone: cleanSenderPhone(ctx.from),
      receivedAt: event.timestamp.toISOString(),
      name: parsed.name,
      ...(parsed.description ? { description: parsed.description } : {}),
      ...(parsed.sellingPrice !== undefined
        ? { sellingPrice: parsed.sellingPrice }
        : {}),
#
      ...(parsed.purchasePrice !== undefined
        ? { purchasePrice: parsed.purchasePrice }
        : {}),
      ...(parsed.quantity !== undefined ? { quantity: parsed.quantity } : {}),
      images: [
        {
          base64,
          mimeType: ctx.mediaType,
          fileName: path.basename(ctx.mediaPath),
          sortOrder: 0,
          primary: true,
          approved: true,
        },
      ],
    },
#
  });
} finally {
  await client.close();
}

If you always want the field present, even when empty:

description: parsed.description ?? "",

One important catch: create_product_draft must actually accept a description argument in its schema. If not, add it there too, for example:

#
description: z.string().optional(),

So the full flow is:

  1. extract description in extractCaptionFields
  2. include it in parsed
  3. pass it into client.callTool(...)
  4. make sure the tool input schema accepts it

If you want, paste the create_product_draft schema and I’ll show the exact change.

knotty onyx
#

is caption we need to extract description if

Description or desc

is there

neon galeBOT
#

Yep, then keep it explicit and only extract when the caption contains Description or desc.

Use this:

const descriptionRegex =
  /(?:^|[\n|;])\s*(?:description|desc)\s*[:=\-]?\s*([^\n|;]+)/i;

const descriptionMatch = raw.match(descriptionRegex);
const description = descriptionMatch?.[1]?.trim();

Then remove it from cleanedName too:

const cleanedName = raw
#
  .replace(descriptionRegex, " ")
  .replace(/\b(?:pp|purchase(?:\s*price)?)\b\s*[:=\-]?\s*(?:rs\.?|₹)?\s*[0-9][0-9,]*\b/gi, " ")
  .replace(/\b(?:sp|selling(?:\s*price)?|sale(?:\s*price)?|price)\b\s*[:=\-]?\s*(?:rs\.?|₹)?\s*[0-9][0-9,]*\b/gi, " ")
  .replace(/\b(?:qty|quantity)\b\s*[:=\-]?\s*[0-9][0-9,]*\b/gi, " ")
  .replace(/[|,;]+/g, " ")
  .replace(/\s{2,}/g, " ")
  .trim();

And return it:

return {
  name: cleanedName || "Untitled product",
  ...(description ? { description } : {}),