1+ package com .ai .lawyer .global .qdrant .loader ;
2+
3+ import com .ai .lawyer .domain .law .entity .*;
4+ import com .ai .lawyer .domain .law .repository .*;
5+ import com .ai .lawyer .domain .precedent .repository .PrecedentRepository ;
6+ import com .ai .lawyer .global .qdrant .entity .Qdrent ;
7+ import com .ai .lawyer .global .qdrant .repository .QdrantRepository ;
8+ import io .qdrant .client .QdrantClient ;
9+ import io .qdrant .client .grpc .Collections ;
10+ import jakarta .annotation .PostConstruct ;
11+ import lombok .RequiredArgsConstructor ;
12+ import lombok .extern .slf4j .Slf4j ;
13+ import org .springframework .ai .document .Document ;
14+ import org .springframework .ai .transformer .splitter .TextSplitter ;
15+ import org .springframework .ai .vectorstore .VectorStore ;
16+ import org .springframework .beans .factory .annotation .Value ;
17+ import org .springframework .stereotype .Component ;
18+
19+ import java .util .ArrayList ;
20+ import java .util .HashMap ;
21+ import java .util .List ;
22+ import java .util .Map ;
23+ import java .util .concurrent .ExecutionException ;
24+
25+ @ Slf4j
26+ @ Component
27+ @ RequiredArgsConstructor
28+ public class LawLoader {
29+
30+ private final PrecedentRepository precedentRepository ;
31+ private final VectorStore vectorStore ;
32+ private final QdrantClient qdrantClient ;
33+ private final TextSplitter textSplitter ;
34+
35+ private final LawRepository lawRepository ;
36+ private final HangRepository hangRepository ;
37+ private final JoRepository joRepository ;
38+ private final JangRepository jangRepository ;
39+ private final HoRepository hoRepository ;
40+ private final QdrantRepository qdrantRepository ;
41+
42+
43+ @ Value ("${spring.ai.vectorstore.qdrant.collection-name}" )
44+ private String collectionName ;
45+
46+ @ Value ("${spring.ai.vectorstore.qdrant.vector-size}" )
47+ private Long vectorSize ;
48+
49+ // 순서:
50+ @ PostConstruct
51+ public void init () throws ExecutionException , InterruptedException {
52+
53+ existQdrantCollection ();
54+
55+ Qdrent qdrent = qdrantRepository .findById (1L ).orElse (
56+ Qdrent .builder ().PointsCount (0L ).build ()
57+ );
58+
59+ if (!verification (qdrent .getPointsCount ())) {
60+ return ;
61+ }
62+
63+ //loadCasesIntoVectorStore(); 주석 풀기 금지 -> 과금
64+ //loadLawsIntoVectorStore(); 주석 풀기 금지 -> 과금
65+
66+ qdrent .setPointsCount (qdrantClient .getCollectionInfoAsync (collectionName ).get ().getPointsCount ());
67+
68+ qdrantRepository .save (qdrent );
69+ }
70+
71+ public void loadCasesIntoVectorStore () {
72+ log .info ("판례 데이터 벡터화를 시작합니다..." );
73+
74+ List <Document > documents = precedentRepository .findAll ().stream ()
75+ .limit (5 )
76+ .flatMap (lawEntity -> {
77+ Document originalDoc = new Document (
78+ lawEntity .getPrecedentContent (),
79+ Map .of ("type" , "판례" , "caseNumber" , lawEntity .getCaseNumber (), "court" , lawEntity .getCourtName ())
80+ );
81+ return textSplitter .apply (List .of (originalDoc )).stream ();
82+ }).toList ();
83+
84+ vectorStore .add (documents );
85+ log .info ("판례 데이터 {}건을 벡터 저장소에 성공적으로 저장했습니다." , documents .size ());
86+ }
87+
88+ public void loadLawsIntoVectorStore () {
89+ log .info ("법령 데이터 벡터화를 시작합니다..." );
90+ List <Document > allChunks = new ArrayList <>();
91+
92+ List <Law > laws = lawRepository .findAll ();
93+ int lawCount = 0 ;
94+ for (Law law : laws ) {
95+ if (lawCount ++ >= 10 ) break ;
96+
97+ List <Jang > jangs = jangRepository .findByLaw (law );
98+ int jangCount = 0 ;
99+ for (Jang jang : jangs ) {
100+ if (jangCount ++ >= 10 ) break ;
101+
102+ List <Jo > jos = joRepository .findByJang (jang );
103+ int joCount = 0 ;
104+ for (Jo jo : jos ) {
105+ if (joCount ++ >= 10 ) break ;
106+
107+ StringBuilder contentBuilder = new StringBuilder ();
108+
109+ if (jo .getContent () != null && !jo .getContent ().isBlank ()) {
110+ contentBuilder .append (jo .getContent ()).append ("\n " );
111+ }
112+
113+ List <Hang > hangs = hangRepository .findByJo (jo );
114+ int hangCount = 0 ;
115+ for (Hang hang : hangs ) {
116+ if (hangCount ++ >= 10 ) break ;
117+
118+ if (hang .getContent () != null && !hang .getContent ().isBlank ()) {
119+ contentBuilder .append (hang .getContent ()).append ("\n " );
120+ }
121+
122+ List <Ho > hos = hoRepository .findByHang (hang );
123+ int hoCount = 0 ;
124+ for (Ho ho : hos ) {
125+ if (hoCount ++ >= 10 ) break ;
126+
127+ if (ho .getContent () != null && !ho .getContent ().isBlank ()) {
128+ contentBuilder .append (ho .getContent ()).append ("\n " );
129+ }
130+ }
131+ }
132+
133+ Map <String , Object > metadata = new HashMap <>();
134+ metadata .put ("type" , "법령" );
135+ metadata .put ("lawName" , law .getLawName ());
136+
137+ Document originalDoc = new Document (contentBuilder .toString (), metadata );
138+ List <Document > chunks = textSplitter .apply (List .of (originalDoc ));
139+ allChunks .addAll (chunks );
140+ }
141+ }
142+ }
143+
144+ vectorStore .add (allChunks );
145+ log .info ("법령 데이터 {}건을 벡터 저장소에 성공적으로 저장했습니다." , allChunks .size ());
146+ }
147+
148+ private void existQdrantCollection () throws InterruptedException , ExecutionException {
149+ // 현재 Qdrant에 있는 모든 컬렉션 목록을 가져옴
150+ var collections = qdrantClient .listCollectionsAsync ().get ();
151+ boolean collectionExists = collections .stream ()
152+ .anyMatch (collection -> collection .equals (collectionName ));
153+
154+ // 만약 컬렉션이 없다면, 새로 생성
155+ if (!collectionExists ) {
156+ log .info ("'{}' 컬렉션이 존재하지 않아 새로 생성중" , collectionName );
157+ qdrantClient .createCollectionAsync (
158+ collectionName ,
159+ Collections .VectorParams .newBuilder ()
160+ .setSize (vectorSize ) // yml에 설정된 벡터 크기
161+ .setDistance (Collections .Distance .Cosine ) // 가장 일반적인 거리 측정 방식
162+ .build ()
163+ ).get ();
164+ log .info ("'{}' 컬렉션 생성을 완료했습니다." , collectionName );
165+ } else {
166+ log .info ("'{}' 컬렉션이 이미 존재합니다." , collectionName );
167+ }
168+ }
169+
170+ private boolean verification (Long count ) throws ExecutionException , InterruptedException {
171+
172+ if (lawRepository .count () == 0 ) {
173+ log .warn ("데이터베이스에 법령 데이터가 없습니다. data.sql을 확인하세요." );
174+ return false ;
175+ }
176+
177+ if (precedentRepository .count () == 0 ) {
178+ log .warn ("데이터베이스에 판례 데이터가 없습니다. data.sql을 확인하세요." );
179+ return false ;
180+ }
181+
182+ if (count == 0 ) {
183+ return true ;
184+ }
185+
186+ if (qdrantClient .getCollectionInfoAsync (collectionName ).get ().getPointsCount () == count ) {
187+ log .info ("Qdrant 벡터 저장소에 이미 모든 데이터가 존재합니다." );
188+ return false ;
189+ }
190+
191+ return true ;
192+ }
193+ }
0 commit comments