EnhancementImage metadata extraction pipeline

Image metadata extraction pipeline

When working with images in your application, you need to automatically extract and structure metadata like colors, subjects, and resolution.

import { BooleanField, NumberField, StringField, Unbody, IImageBlock } from "unbody";
import {
  AutoSummary,
  AutoVision,
  CustomSchema,
  Enhancement,
  Generative,
  ImageVectorizer, 
  PdfParser,
  ProjectSettings,
  TextVectorizer,
  UnbodyAdmin,
} from "unbody/admin";
 
// Create admin instance with credentials
const admin = new UnbodyAdmin({
    auth: {
        username: "93156549-...", // ID
        password: "ak_7660b....", // Secret
    },
});
 
// 1. Create the enhancement pipeline for image analysis
const imageEnhancementPipeline = new Enhancement.Pipeline(
  "enrich_image",  
  "ImageBlock",    
);
 
// 2. Define metadata extraction step using openai-gpt-4o
const extractImageInfo = new Enhancement.Step(
  "extract_image_info",
  new Enhancement.Action.StructuredGenerator({
    model: "openai-gpt-4o",
    
    prompt: (ctx) => `
      Analyze this image and extract colors, subjects, resolution:
      Image Url: ${ctx.record.url || "None"}
      Caption: ${ctx.record.caption || "No caption"}
      Width: ${ctx.record.width}
      Height: ${ctx.record.height}
      size in bytes: ${ctx.record.size}
    `,
    // using zod schema 
    schema: (ctx, { z }) =>
      z.object({
        colors: z.array(z.string()).describe("Main colors in the image"),
        subjects: z.array(z.string()).describe("Main subjects or objects"),
        resolution: z.string().describe("resolution of the image"),
      }),
  }),
  {
    output: {
      colors: (ctx) => JSON.stringify(ctx.result.json.colors),
      subjects: (ctx) => JSON.stringify(ctx.result.json.subjects),
      resolution: (ctx) => ctx.result.json.resolution,
    },
  }
);
 
// 3. Add extraction step to pipeline
imageEnhancementPipeline.add(extractImageInfo);
 
// 4. Configure a project with the enhancement pipeline and other settings
const enhancement = new Enhancement();
enhancement.add(imageEnhancementPipeline);
 
const projectSettings = new ProjectSettings();
projectSettings
  .set(new TextVectorizer(TextVectorizer.OpenAI.TextEmbedding3Small))
  .set(new ImageVectorizer(ImageVectorizer.Img2VecNeural.Default))
  .set(new Generative(Generative.OpenAI.GPT4o))
  .set(new AutoSummary(AutoSummary.OpenAI.GPT4o))
  .set(new AutoVision(AutoVision.OpenAI.GPT4o))
  .set(new PdfParser(PdfParser.NlmSherpa.Default))
  .set(enhancement);
 
// 5. Create project with extended ImageBlock schema
const project = admin.projects.ref({
    name: "Image Analysis Project",
    settings: projectSettings,
});
 
project.settings.set(
    new CustomSchema().extend(
    new CustomSchema.Collection("ImageBlock")
        .add(new CustomSchema.Field.Text("colors", "Main colors detected in the image"))
        .add(new CustomSchema.Field.Text("subjects", "Main subjects or objects detected"))
        .add(new CustomSchema.Field.Text("resolution", "Resolution information"))
    )
);
 
await project.save();
 
// Create unbody sdk instance
const unbody = new Unbody({
    apiKey: "pk_.....", // Project Api Key
    projectId: "22e0b...", // Project ID
});
 
// 6. Query enhanced images with type safety
interface ExtendedImageBlock extends IImageBlock {
  colors: StringField;
  subjects: StringField;
  resolution: StringField;
}
 
const { data: { payload } } = await unbody.get
  .collection<ExtendedImageBlock>("ImageBlock")
  .select("originalName", "colors", "subjects", "resolution")
  .exec();
 
console.log(payload)  

©2024 Unbody