Skip to content

Commit eb9b851

Browse files
committed
feat[vector]: 법령 및 판례 데이터를 Qdrant 벡터 스토어에 로드
1 parent 1d91c65 commit eb9b851

File tree

1 file changed

+193
-0
lines changed
  • backend/src/main/java/com/ai/lawyer/global/qdrant/loader

1 file changed

+193
-0
lines changed
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
package com.ai.lawyer.global.qdrant.loader;
2+
3+
import com.ai.lawyer.domain.law.entity.*;
4+
import com.ai.lawyer.domain.law.repository.*;
5+
import com.ai.lawyer.domain.precedent.repository.PrecedentRepository;
6+
import com.ai.lawyer.global.qdrant.entity.Qdrent;
7+
import com.ai.lawyer.global.qdrant.repository.QdrantRepository;
8+
import io.qdrant.client.QdrantClient;
9+
import io.qdrant.client.grpc.Collections;
10+
import jakarta.annotation.PostConstruct;
11+
import lombok.RequiredArgsConstructor;
12+
import lombok.extern.slf4j.Slf4j;
13+
import org.springframework.ai.document.Document;
14+
import org.springframework.ai.transformer.splitter.TextSplitter;
15+
import org.springframework.ai.vectorstore.VectorStore;
16+
import org.springframework.beans.factory.annotation.Value;
17+
import org.springframework.stereotype.Component;
18+
19+
import java.util.ArrayList;
20+
import java.util.HashMap;
21+
import java.util.List;
22+
import java.util.Map;
23+
import java.util.concurrent.ExecutionException;
24+
25+
@Slf4j
26+
@Component
27+
@RequiredArgsConstructor
28+
public class LawLoader {
29+
30+
private final PrecedentRepository precedentRepository;
31+
private final VectorStore vectorStore;
32+
private final QdrantClient qdrantClient;
33+
private final TextSplitter textSplitter;
34+
35+
private final LawRepository lawRepository;
36+
private final HangRepository hangRepository;
37+
private final JoRepository joRepository;
38+
private final JangRepository jangRepository;
39+
private final HoRepository hoRepository;
40+
private final QdrantRepository qdrantRepository;
41+
42+
43+
@Value("${spring.ai.vectorstore.qdrant.collection-name}")
44+
private String collectionName;
45+
46+
@Value("${spring.ai.vectorstore.qdrant.vector-size}")
47+
private Long vectorSize;
48+
49+
// 순서:
50+
@PostConstruct
51+
public void init() throws ExecutionException, InterruptedException {
52+
53+
existQdrantCollection();
54+
55+
Qdrent qdrent = qdrantRepository.findById(1L).orElse(
56+
Qdrent.builder().PointsCount(0L).build()
57+
);
58+
59+
if (!verification(qdrent.getPointsCount())) {
60+
return;
61+
}
62+
63+
//loadCasesIntoVectorStore(); 주석 풀기 금지 -> 과금
64+
//loadLawsIntoVectorStore(); 주석 풀기 금지 -> 과금
65+
66+
qdrent.setPointsCount(qdrantClient.getCollectionInfoAsync(collectionName).get().getPointsCount());
67+
68+
qdrantRepository.save(qdrent);
69+
}
70+
71+
public void loadCasesIntoVectorStore() {
72+
log.info("판례 데이터 벡터화를 시작합니다...");
73+
74+
List<Document> documents = precedentRepository.findAll().stream()
75+
.limit(5)
76+
.flatMap(lawEntity -> {
77+
Document originalDoc = new Document(
78+
lawEntity.getPrecedentContent(),
79+
Map.of("type", "판례", "caseNumber", lawEntity.getCaseNumber(), "court", lawEntity.getCourtName())
80+
);
81+
return textSplitter.apply(List.of(originalDoc)).stream();
82+
}).toList();
83+
84+
vectorStore.add(documents);
85+
log.info("판례 데이터 {}건을 벡터 저장소에 성공적으로 저장했습니다.", documents.size());
86+
}
87+
88+
public void loadLawsIntoVectorStore() {
89+
log.info("법령 데이터 벡터화를 시작합니다...");
90+
List<Document> allChunks = new ArrayList<>();
91+
92+
List<Law> laws = lawRepository.findAll();
93+
int lawCount = 0;
94+
for (Law law : laws) {
95+
if (lawCount++ >= 10) break;
96+
97+
List<Jang> jangs = jangRepository.findByLaw(law);
98+
int jangCount = 0;
99+
for (Jang jang : jangs) {
100+
if (jangCount++ >= 10) break;
101+
102+
List<Jo> jos = joRepository.findByJang(jang);
103+
int joCount = 0;
104+
for (Jo jo : jos) {
105+
if (joCount++ >= 10) break;
106+
107+
StringBuilder contentBuilder = new StringBuilder();
108+
109+
if (jo.getContent() != null && !jo.getContent().isBlank()) {
110+
contentBuilder.append(jo.getContent()).append("\n");
111+
}
112+
113+
List<Hang> hangs = hangRepository.findByJo(jo);
114+
int hangCount = 0;
115+
for (Hang hang : hangs) {
116+
if (hangCount++ >= 10) break;
117+
118+
if (hang.getContent() != null && !hang.getContent().isBlank()) {
119+
contentBuilder.append(hang.getContent()).append("\n");
120+
}
121+
122+
List<Ho> hos = hoRepository.findByHang(hang);
123+
int hoCount = 0;
124+
for (Ho ho : hos) {
125+
if (hoCount++ >= 10) break;
126+
127+
if (ho.getContent() != null && !ho.getContent().isBlank()) {
128+
contentBuilder.append(ho.getContent()).append("\n");
129+
}
130+
}
131+
}
132+
133+
Map<String, Object> metadata = new HashMap<>();
134+
metadata.put("type", "법령");
135+
metadata.put("lawName", law.getLawName());
136+
137+
Document originalDoc = new Document(contentBuilder.toString(), metadata);
138+
List<Document> chunks = textSplitter.apply(List.of(originalDoc));
139+
allChunks.addAll(chunks);
140+
}
141+
}
142+
}
143+
144+
vectorStore.add(allChunks);
145+
log.info("법령 데이터 {}건을 벡터 저장소에 성공적으로 저장했습니다.", allChunks.size());
146+
}
147+
148+
private void existQdrantCollection() throws InterruptedException, ExecutionException {
149+
// 현재 Qdrant에 있는 모든 컬렉션 목록을 가져옴
150+
var collections = qdrantClient.listCollectionsAsync().get();
151+
boolean collectionExists = collections.stream()
152+
.anyMatch(collection -> collection.equals(collectionName));
153+
154+
// 만약 컬렉션이 없다면, 새로 생성
155+
if (!collectionExists) {
156+
log.info("'{}' 컬렉션이 존재하지 않아 새로 생성중", collectionName);
157+
qdrantClient.createCollectionAsync(
158+
collectionName,
159+
Collections.VectorParams.newBuilder()
160+
.setSize(vectorSize) // yml에 설정된 벡터 크기
161+
.setDistance(Collections.Distance.Cosine) // 가장 일반적인 거리 측정 방식
162+
.build()
163+
).get();
164+
log.info("'{}' 컬렉션 생성을 완료했습니다.", collectionName);
165+
} else {
166+
log.info("'{}' 컬렉션이 이미 존재합니다.", collectionName);
167+
}
168+
}
169+
170+
private boolean verification(Long count) throws ExecutionException, InterruptedException {
171+
172+
if (lawRepository.count() == 0) {
173+
log.warn("데이터베이스에 법령 데이터가 없습니다. data.sql을 확인하세요.");
174+
return false;
175+
}
176+
177+
if (precedentRepository.count() == 0) {
178+
log.warn("데이터베이스에 판례 데이터가 없습니다. data.sql을 확인하세요.");
179+
return false;
180+
}
181+
182+
if (count == 0) {
183+
return true;
184+
}
185+
186+
if (qdrantClient.getCollectionInfoAsync(collectionName).get().getPointsCount() == count) {
187+
log.info("Qdrant 벡터 저장소에 이미 모든 데이터가 존재합니다.");
188+
return false;
189+
}
190+
191+
return true;
192+
}
193+
}

0 commit comments

Comments
 (0)