diff --git a/docs/docs/examples/examples/product_recommendation.md b/docs/docs/examples/examples/product_recommendation.md index 239e8b5e..6500670a 100644 --- a/docs/docs/examples/examples/product_recommendation.md +++ b/docs/docs/examples/examples/product_recommendation.md @@ -29,9 +29,9 @@ Product taxonomy is a way to organize product catalogs in a logical and hierarch ## Prerequisites -* [Install PostgreSQL](https://cocoindex.io/docs/getting_started/installation#-install-postgres). CocoIndex uses PostgreSQL internally for incremental processing. +- [Install PostgreSQL](https://cocoindex.io/docs/getting_started/installation#-install-postgres). CocoIndex uses PostgreSQL internally for incremental processing. - [Install Neo4j](https://cocoindex.io/docs/targets/neo4j), a graph database. -- - [Configure your OpenAI API key](https://cocoindex.io/docs/ai/llm#openai). Create a `.env` file from `.env.example`, and fill `OPENAI_API_KEY`. +- [Configure your OpenAI API key](https://cocoindex.io/docs/ai/llm#openai). Create a `.env` file from `.env.example`, and fill `OPENAI_API_KEY`. Alternatively, we have native support for Gemini, Ollama, LiteLLM. You can choose your favorite LLM provider and work completely on-premises. @@ -134,44 +134,43 @@ It performs the following transformations: ### Product Taxonomy Definition -Since we are using LLM to extract product taxonomy, we need to provide a detailed instruction at the class-level docstring. +Since we are using LLM to extract product taxonomy, we need to provide a detailed instruction at the field-level description. ```python -@dataclasses.dataclass -class ProductTaxonomy: - """ - Taxonomy for the product. - - A taxonomy is a concise noun (or short noun phrase), based on its core functionality, without specific details such as branding, style, etc. - - Always use the most common words in US English. - - Use lowercase without punctuation, unless it's a proper noun or acronym. - - A product may have multiple taxonomies. Avoid large categories like "office supplies" or "electronics". Use specific ones, like "pen" or "printer". - """ - name: str +class ProductTaxonomy(BaseModel): + """ + Taxonomy for the product. + """ + + name: str = Field( + description="A taxonomy is a concise noun (or short noun phrase), based on its core functionality, " + "without specific details such as branding, style, etc. Always use the most common words in US " + "English. Use lowercase without punctuation, unless it's a proper noun or acronym. A product may " + "have multiple taxonomies. Avoid large categories like 'office supplies' or 'electronics'. Use " + "specific ones, like 'pen' or 'printer'." + ) ``` ### Define Product Taxonomy Info -Basically we want to extract all possible taxonomies for a product, and think about what other products are likely to be bought together with the current product. +Basically, we want to extract all possible taxonomies for a product and think about what other products are likely to be bought together with the current product. ```python -@dataclasses.dataclass -class ProductTaxonomyInfo: - """ - Taxonomy information for the product. - - Fields: - - taxonomies: Taxonomies for the current product. - - complementary_taxonomies: Think about when customers buy this product, what else they might need as complementary products. Put labels for these complentary products. - """ - taxonomies: list[ProductTaxonomy] - complementary_taxonomies: list[ProductTaxonomy] +class ProductTaxonomyInfo(BaseModel): + """ + Taxonomy information for the product. + """ + + taxonomies: list[ProductTaxonomy] = Field( + description="Taxonomies for the current product." + ) + complementary_taxonomies: list[ProductTaxonomy] = Field( + "Think about when customers buy this product, what else they might need as complementary products. Put labels " + "for these complementary products." + ) ``` -For each product, we want some insight about its taxonomy and complementary taxonomy and we could use that as bridge to find related product using knowledge graph. +For each product, we want some insight about its taxonomy and complementary taxonomy, and we could use that as a bridge to find a related product using the knowledge graph. ### LLM Extraction diff --git a/examples/product_recommendation/main.py b/examples/product_recommendation/main.py index b63cf143..140672ca 100644 --- a/examples/product_recommendation/main.py +++ b/examples/product_recommendation/main.py @@ -2,7 +2,7 @@ This example shows how to extract relationships from Markdown documents and build a knowledge graph. """ -import dataclasses +from pydantic import BaseModel, Field import datetime import cocoindex from jinja2 import Template @@ -21,7 +21,6 @@ GraphDbDeclaration = cocoindex.targets.Neo4jDeclaration conn_spec = neo4j_conn_spec - # Template for rendering product information as markdown to provide information to LLMs PRODUCT_TEMPLATE = """ # {{ title }} @@ -42,48 +41,45 @@ """ -@dataclasses.dataclass -class ProductInfo: +class ProductInfo(BaseModel): id: str title: str price: float detail: str -@dataclasses.dataclass -class ProductTaxonomy: +class ProductTaxonomy(BaseModel): """ Taxonomy for the product. - - A taxonomy is a concise noun (or short noun phrase), based on its core functionality, without specific details such as branding, style, etc. - - Always use the most common words in US English. - - Use lowercase without punctuation, unless it's a proper noun or acronym. - - A product may have multiple taxonomies. Avoid large categories like "office supplies" or "electronics". Use specific ones, like "pen" or "printer". """ - name: str + name: str = Field( + description="A taxonomy is a concise noun (or short noun phrase), based on its core functionality, " + "without specific details such as branding, style, etc. Always use the most common words in US " + "English. Use lowercase without punctuation, unless it's a proper noun or acronym. A product may " + "have multiple taxonomies. Avoid large categories like 'office supplies' or 'electronics'. Use " + "specific ones, like 'pen' or 'printer'." + ) -@dataclasses.dataclass -class ProductTaxonomyInfo: +class ProductTaxonomyInfo(BaseModel): """ Taxonomy information for the product. - - Fields: - - taxonomies: Taxonomies for the current product. - - complementary_taxonomies: Think about when customers buy this product, what else they might need as complementary products. Put labels for these complentary products. """ - taxonomies: list[ProductTaxonomy] - complementary_taxonomies: list[ProductTaxonomy] + taxonomies: list[ProductTaxonomy] = Field( + description="Taxonomies for the current product." + ) + complementary_taxonomies: list[ProductTaxonomy] = Field( + "Think about when customers buy this product, what else they might need as complementary products. Put labels " + "for these complementary products." + ) @cocoindex.op.function(behavior_version=2) def extract_product_info(product: cocoindex.Json, filename: str) -> ProductInfo: - # Print markdown for LLM to extract the taxonomy and complimentary taxonomy + """Print markdown for LLM to extract the taxonomy and complimentary taxonomy.""" + return ProductInfo( id=f"{filename.removesuffix('.json')}", title=product["title"], @@ -97,7 +93,7 @@ def store_product_flow( flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope ) -> None: """ - Define an example flow that extracts triples from files and build knowledge graph. + Define an example flow that extracts triples from files and build the knowledge graph. """ data_scope["products"] = flow_builder.add_source( cocoindex.sources.LocalFile(path="products", included_patterns=["*.json"]), diff --git a/examples/product_recommendation/pyproject.toml b/examples/product_recommendation/pyproject.toml index f8d7b208..e7fce06a 100644 --- a/examples/product_recommendation/pyproject.toml +++ b/examples/product_recommendation/pyproject.toml @@ -3,7 +3,7 @@ name = "cocoindex-ecommerce-taxonomy" version = "0.1.0" description = "Simple example for CocoIndex: extract taxonomy from e-commerce products and build knowledge graph." requires-python = ">=3.11" -dependencies = ["cocoindex>=0.3.9", "jinja2>=3.1.6"] +dependencies = ["cocoindex>=0.3.9", "jinja2>=3.1.6", "pydantic>=2.11.9"] [tool.setuptools] packages = []