@@ -58,20 +58,6 @@ def get_index(self, **index_kwargs) -> "PineconeIndex":
5858 return index
5959
6060
61- class PineconeUploadStagerConfig (UploadStagerConfig ):
62- pass
63-
64-
65- class PineconeUploaderConfig (UploaderConfig ):
66- batch_size : Optional [int ] = Field (
67- default = None ,
68- description = "Optional number of records per batch. Will otherwise limit by size." ,
69- )
70- pool_threads : Optional [int ] = Field (
71- default = 1 , description = "Optional limit on number of threads to use for upload"
72- )
73-
74-
7561ALLOWED_FIELDS = (
7662 "element_id" ,
7763 "text" ,
@@ -86,31 +72,56 @@ class PineconeUploaderConfig(UploaderConfig):
8672 "is_continuation" ,
8773 "link_urls" ,
8874 "link_texts" ,
75+ "text_as_html" ,
8976)
9077
9178
79+ class PineconeUploadStagerConfig (UploadStagerConfig ):
80+ metadata_fields : list [str ] = Field (
81+ default = str (ALLOWED_FIELDS ),
82+ description = (
83+ "which metadata from the source element to map to the payload metadata being sent to "
84+ "Pinecone."
85+ ),
86+ )
87+
88+
89+ class PineconeUploaderConfig (UploaderConfig ):
90+ batch_size : Optional [int ] = Field (
91+ default = None ,
92+ description = "Optional number of records per batch. Will otherwise limit by size." ,
93+ )
94+ pool_threads : Optional [int ] = Field (
95+ default = 1 , description = "Optional limit on number of threads to use for upload"
96+ )
97+
98+
9299@dataclass
93100class PineconeUploadStager (UploadStager ):
94101 upload_stager_config : PineconeUploadStagerConfig = field (
95102 default_factory = lambda : PineconeUploadStagerConfig ()
96103 )
97104
98- @staticmethod
99- def conform_dict (element_dict : dict ) -> dict :
105+ def conform_dict (self , element_dict : dict ) -> dict :
100106 embeddings = element_dict .pop ("embeddings" , None )
101107 metadata : dict [str , Any ] = element_dict .pop ("metadata" , {})
102108 data_source = metadata .pop ("data_source" , {})
103109 coordinates = metadata .pop ("coordinates" , {})
104-
105- element_dict .update (metadata )
106- element_dict .update (data_source )
107- element_dict .update (coordinates )
110+ pinecone_metadata = {}
111+ for possible_meta in [element_dict , metadata , data_source , coordinates ]:
112+ pinecone_metadata .update (
113+ {
114+ k : v
115+ for k , v in possible_meta .items ()
116+ if k in self .upload_stager_config .metadata_fields
117+ }
118+ )
108119
109120 return {
110121 "id" : str (uuid .uuid4 ()),
111122 "values" : embeddings ,
112123 "metadata" : flatten_dict (
113- { k : v for k , v in element_dict . items () if k in ALLOWED_FIELDS } ,
124+ pinecone_metadata ,
114125 separator = "-" ,
115126 flatten_lists = True ,
116127 remove_none = True ,
0 commit comments