diff --git a/backend/build.gradle b/backend/build.gradle index 7baf1b3f..38910f75 100644 --- a/backend/build.gradle +++ b/backend/build.gradle @@ -40,6 +40,7 @@ dependencies { implementation 'org.springframework.boot:spring-boot-starter-actuator' implementation 'org.springframework.boot:spring-boot-starter-oauth2-client' implementation group: 'org.springframework.boot', name: 'spring-boot-starter-mail', version: '3.0.5' + implementation 'org.springframework.boot:spring-boot-starter-batch' // API Documentation (문서화) implementation 'org.apache.commons:commons-lang3:3.18.0' @@ -78,6 +79,8 @@ dependencies { implementation 'org.springframework.ai:spring-ai-starter-model-openai' implementation 'org.springframework.ai:spring-ai-advisors-vector-store' implementation 'org.springframework.ai:spring-ai-starter-model-chat-memory-repository-jdbc' + implementation 'org.springframework.ai:spring-ai-starter-model-ollama' + implementation 'org.springframework.ai:spring-ai-starter-model-huggingface' // Testing (테스트) testImplementation 'org.springframework.boot:spring-boot-starter-test' diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml index 79c684f6..6726a097 100644 --- a/backend/docker-compose.yml +++ b/backend/docker-compose.yml @@ -58,7 +58,28 @@ services: timeout: 5s retries: 10 + ollama: + image: ollama/ollama:latest + container_name: ollama + restart: unless-stopped + ports: + - "11434:11434" + volumes: + - ollama-data:/root/.ollama + entrypoint: [ "/bin/sh", "-c" ] + command: > + "ollama serve & + sleep 5 && + ollama pull daynice/kure-v1:567m && + wait" + healthcheck: + test: [ "CMD", "curl", "-f", "http://localhost:11434/api/version" ] + interval: 10s + timeout: 5s + retries: 10 + volumes: mysql-data: redis-data: - qdrant-data: \ No newline at end of file + qdrant-data: + ollama-data: \ No newline at end of file diff --git a/backend/src/main/java/com/ai/lawyer/domain/chatbot/dto/ExtractionDto.java b/backend/src/main/java/com/ai/lawyer/domain/chatbot/dto/ExtractionDto.java index e1ddbf82..e5e6b5d6 100644 --- a/backend/src/main/java/com/ai/lawyer/domain/chatbot/dto/ExtractionDto.java +++ b/backend/src/main/java/com/ai/lawyer/domain/chatbot/dto/ExtractionDto.java @@ -4,8 +4,6 @@ import lombok.Data; import lombok.NoArgsConstructor; -import java.util.List; - public class ExtractionDto { @Data @@ -19,7 +17,7 @@ public static class TitleExtractionDto { @AllArgsConstructor @NoArgsConstructor public static class KeywordExtractionDto { - private List keyword; + private String keyword; } } diff --git a/backend/src/main/java/com/ai/lawyer/domain/chatbot/service/ChatBotService.java b/backend/src/main/java/com/ai/lawyer/domain/chatbot/service/ChatBotService.java index 7c54719b..4eed34d9 100644 --- a/backend/src/main/java/com/ai/lawyer/domain/chatbot/service/ChatBotService.java +++ b/backend/src/main/java/com/ai/lawyer/domain/chatbot/service/ChatBotService.java @@ -57,17 +57,13 @@ public class ChatBotService { // 멤버 조회 -> 벡터 검색 (판례, 법령) -> 프롬프트 생성 (시스템, 유저) -> 채팅 클라이언트 호출 (스트림) -> 응답 저장, 제목/키워드 추출 public Flux sendMessage(Long memberId, ChatRequest chatChatRequestDto, Long roomId) { - if(memberId == null) { - log.error("해당 멤버는 존재하지 않거나, accessToken이 만료되거나 잘못되었습니다."); - } - Member member = memberRepository.findById(memberId) .orElseThrow(() -> new IllegalArgumentException("존재하지 않는 회원입니다.") ); // 벡터 검색 (판례, 법령) - List similarCaseDocuments = qdrantService.searchDocument(chatChatRequestDto.getMessage(), "type", "판례", 3); - List similarLawDocuments = qdrantService.searchDocument(chatChatRequestDto.getMessage(), "type", "법령", 2); + List similarCaseDocuments = qdrantService.searchDocument(chatChatRequestDto.getMessage(), "type", "판례"); + List similarLawDocuments = qdrantService.searchDocument(chatChatRequestDto.getMessage(), "type", "법령"); // 판례와 법령 정보를 구분 있게 포맷팅 String caseContext = formatting(similarCaseDocuments); @@ -167,18 +163,19 @@ private void handlerTasks(ChatRequest chatDto, History history, String fullRespo private void extractAndUpdateKeywordRanks(String message) { KeywordExtractionDto keywordResponse = keywordExtract(message, keywordExtraction, KeywordExtractionDto.class); - for (String keyword : keywordResponse.getKeyword()) { - KeywordRank keywordRank = keywordRankRepository.findByKeyword(keyword); - if (keywordRank == null) { - keywordRank = KeywordRank.builder() - .keyword(keyword) - .score(1L) - .build(); - } else { - keywordRank.setScore(keywordRank.getScore() + 1); - } - keywordRankRepository.save(keywordRank); + KeywordRank keywordRank = keywordRankRepository.findByKeyword(keywordResponse.getKeyword()); + + if (keywordRank == null) { + keywordRank = KeywordRank.builder() + .keyword(keywordResponse.getKeyword()) + .score(1L) + .build(); + } else { + keywordRank.setScore(keywordRank.getScore() + 1); } + + keywordRankRepository.save(keywordRank); + } private void setHistoryTitle(ChatRequest chatDto, History history, String fullResponse) { diff --git a/backend/src/main/java/com/ai/lawyer/global/batch/BatchScheduler.java b/backend/src/main/java/com/ai/lawyer/global/batch/BatchScheduler.java new file mode 100644 index 00000000..e95b6242 --- /dev/null +++ b/backend/src/main/java/com/ai/lawyer/global/batch/BatchScheduler.java @@ -0,0 +1,25 @@ +package com.ai.lawyer.global.batch; + +/*@Slf4j +@Component +@EnableScheduling +@RequiredArgsConstructor +public class BatchScheduler { + + private final JobLauncher jobLauncher; + private final Job dataVectorizationJob; + + @Scheduled(cron = "#{${batch.scheduler.run-every-minute} ? '* * * * * *' : '* * 2 * * *'}") + public void runVectorizationJob() { + log.info("전체 데이터(판례, 법령) 벡터화 스케줄러 실행..."); + try { + JobParameters jobParameters = new JobParametersBuilder() + .addString("requestDate", LocalDateTime.now().toString()) + .toJobParameters(); + + jobLauncher.run(dataVectorizationJob, jobParameters); // Job 실행 + } catch (Exception e) { + log.error("전체 데이터 벡터화 배치 작업 실행 중 오류 발생", e); + } + } +}*/ diff --git a/backend/src/main/java/com/ai/lawyer/global/batch/DataVectorizationJobConfig.java b/backend/src/main/java/com/ai/lawyer/global/batch/DataVectorizationJobConfig.java new file mode 100644 index 00000000..ac69d2ce --- /dev/null +++ b/backend/src/main/java/com/ai/lawyer/global/batch/DataVectorizationJobConfig.java @@ -0,0 +1,208 @@ +package com.ai.lawyer.global.batch; + +/*@Slf4j +@Configuration +@RequiredArgsConstructor +public class DataVectorizationJobConfig { + + private final JobRepository jobRepository; + private final PlatformTransactionManager transactionManager; + private final EntityManagerFactory entityManagerFactory; + private final VectorStore vectorStore; + + private final JangRepository jangRepository; + private final JoRepository joRepository; + private final HangRepository hangRepository; + private final HoRepository hoRepository; + + private final TokenTextSplitter tokenSplitter = TokenTextSplitter.builder() + .withChunkSize(800) + .withMinChunkSizeChars(0) + .withMinChunkLengthToEmbed(5) + .withMaxNumChunks(10000) + .withKeepSeparator(true) + .build(); + + private static final int CHUNK_SIZE = 10; // 배치 처리 시 한 번에 읽어올 데이터 수 + + @Value("${batch.page.size.precedent}") + private int precedentPageSize; // 하루에 처리할 판례 수 + + @Value("${batch.page.size.law}") + private int lawPageSize; // 하루에 처리할 법령 수 + + @Bean + public TaskExecutor taskExecutor() { + ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); + executor.setCorePoolSize(10); + executor.setMaxPoolSize(20); + executor.setQueueCapacity(100); + executor.setThreadNamePrefix("async-thread-"); + executor.initialize(); + return executor; + } + + // -------------- 전체 데이터 벡터화 정의 -------------- + @Bean + public Job dataVectorizationJob() { + return new JobBuilder("dataVectorizationJob", jobRepository) + .start(precedentVectorizationStep()) // 판례 벡터화 Step 실행 + .next(lawVectorizationStep()) // 법령 벡터화 Step 실행 + .build(); + } + + // -------------- 판례 벡터화 --------------- + @Bean + public Step precedentVectorizationStep() { + log.info(">>>>>> 판례 벡터화 시작"); + return new StepBuilder("precedentVectorizationStep", jobRepository) + .>chunk(CHUNK_SIZE, transactionManager) + .reader(precedentItemReader()) + .processor(precedentItemProcessor()) + .writer(documentItemWriter()) + .taskExecutor(taskExecutor()) + .build(); + } + + @Bean + public JpaPagingItemReader precedentItemReader() { + return new JpaPagingItemReaderBuilder() + .name("precedentItemReader") + .entityManagerFactory(entityManagerFactory) + .pageSize(CHUNK_SIZE) + .maxItemCount(precedentPageSize) + .queryString("SELECT p FROM Precedent p ORDER BY p.id ASC") + .build(); + } + + @Bean + public ItemProcessor> precedentItemProcessor() { + + return precedent -> { + String content = precedent.getPrecedentContent(); + if (content == null || content.isBlank()) return null; + + Document originalDoc = new Document(content, Map.of( + "type", "판례", + "caseNumber", precedent.getCaseNumber(), + "court", precedent.getCourtName(), + "caseName", precedent.getCaseName() + )); + + List chunkDocs = tokenSplitter.split(originalDoc); + List finalChunks = new ArrayList<>(); + + // 청크별로 메타데이터에 인덱스 추가 -> 구분 용도 + for (int i = 0; i < chunkDocs.size(); i++) { + Document chunk = chunkDocs.get(i); + Map newMetadata = new HashMap<>(chunk.getMetadata()); + newMetadata.put("chunkIndex", i); + finalChunks.add(new Document(chunk.getText(), newMetadata)); + } + return finalChunks; + }; + } + + // -------------- 법령 백터화 --------------- + @Bean + public Step lawVectorizationStep() { + log.info(">>>>>> 법령 벡터화 시작"); + return new StepBuilder("lawVectorizationStep", jobRepository) + .>chunk(CHUNK_SIZE, transactionManager) // 법령은 한 번에 10개씩 처리 + .reader(lawItemReader()) + .processor(lawItemProcessor()) + .writer(documentItemWriter()) + .taskExecutor(taskExecutor()) + .build(); + } + + @Bean + public JpaPagingItemReader lawItemReader() { + return new JpaPagingItemReaderBuilder() + .name("lawItemReader") + .entityManagerFactory(entityManagerFactory) + .pageSize(CHUNK_SIZE) + .maxItemCount(lawPageSize) + .queryString("SELECT l FROM Law l ORDER BY l.id ASC") + .build(); + } + + @Bean + public ItemProcessor> lawItemProcessor() { + return law -> { + List finalChunks = new ArrayList<>(); + + List jangs = jangRepository.findByLaw(law); + + for (Jang jang : jangs) { + + StringBuilder contentBuilder = new StringBuilder(); + + contentBuilder.append(law.getLawName()).append("\n"); + + if (jang.getContent() != null && !jang.getContent().isBlank()) { + contentBuilder.append(jang.getContent()).append("\n"); + } + + List jos = joRepository.findByJang(jang); + for (Jo jo : jos) { + + if (jo.getContent() != null && !jo.getContent().isBlank()) { + contentBuilder.append(jo.getContent()).append("\n"); + } + + List hangs = hangRepository.findByJo(jo); + for (Hang hang : hangs) { + if (hang.getContent() != null && !hang.getContent().isBlank()) { + contentBuilder.append(hang.getContent()).append("\n"); + } + + List hos = hoRepository.findByHang(hang); + for (Ho ho : hos) { + if (ho.getContent() != null && !ho.getContent().isBlank()) { + contentBuilder.append(ho.getContent()).append("\n"); + } + } + } + } + + // === Jang 단위로 문서화 === + String finalContent = contentBuilder.toString(); + + if (!finalContent.isBlank()) { + Map metadata = new HashMap<>(); + metadata.put("type", "법령"); + metadata.put("lawName", law.getLawName()); + metadata.put("jangId", jang.getId()); + + Document originalDoc = new Document(finalContent, metadata); + + List chunkDocs = tokenSplitter.split(originalDoc); + + for (int i = 0; i < chunkDocs.size(); i++) { + Document chunk = chunkDocs.get(i); + Map newMetadata = new HashMap<>(chunk.getMetadata()); + newMetadata.put("chunkIndex", i); + finalChunks.add(new Document(chunk.getText(), newMetadata)); + } + } + } + + return finalChunks.isEmpty() ? null : finalChunks; + }; + } + + @Bean + public ItemWriter> documentItemWriter() { + return chunk -> { + List totalDocuments = chunk.getItems().stream() + .flatMap(List::stream) + .collect(Collectors.toList()); + + if (!totalDocuments.isEmpty()) { + vectorStore.add(totalDocuments); + log.info(">>>>>> {}개의 Document 청크를 벡터 저장소에 저장했습니다.", totalDocuments.size()); + } + }; + } +}*/ diff --git a/backend/src/main/java/com/ai/lawyer/global/config/AIConfig.java b/backend/src/main/java/com/ai/lawyer/global/config/AIConfig.java index 035ffba5..914d9553 100644 --- a/backend/src/main/java/com/ai/lawyer/global/config/AIConfig.java +++ b/backend/src/main/java/com/ai/lawyer/global/config/AIConfig.java @@ -3,16 +3,24 @@ import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.memory.ChatMemoryRepository; import org.springframework.ai.chat.memory.repository.jdbc.JdbcChatMemoryRepository; +import org.springframework.ai.embedding.EmbeddingModel; +import org.springframework.ai.ollama.OllamaEmbeddingModel; import org.springframework.ai.openai.OpenAiChatModel; -import org.springframework.ai.transformer.splitter.TokenTextSplitter; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Primary; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.transaction.PlatformTransactionManager; @Configuration public class AIConfig { + @Bean + @Primary + public EmbeddingModel primaryOllamaEmbeddingModel(OllamaEmbeddingModel ollamaEmbeddingModel) { + return ollamaEmbeddingModel; + } + @Bean public ChatMemoryRepository chatMemoryRepository(JdbcTemplate jdbcTemplate, PlatformTransactionManager transactionManager) { return JdbcChatMemoryRepository.builder() @@ -26,9 +34,4 @@ public ChatClient openAiChatClient(OpenAiChatModel openAiChatModel) { return ChatClient.create(openAiChatModel); } - @Bean - public TokenTextSplitter tokenTextSplitter() { - return new TokenTextSplitter(500, 150, 5, 10000, true); - } - } diff --git a/backend/src/main/java/com/ai/lawyer/global/config/DataDBConfig.java b/backend/src/main/java/com/ai/lawyer/global/config/DataDBConfig.java new file mode 100644 index 00000000..2316e031 --- /dev/null +++ b/backend/src/main/java/com/ai/lawyer/global/config/DataDBConfig.java @@ -0,0 +1,79 @@ +package com.ai.lawyer.global.config; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.jdbc.DataSourceBuilder; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Primary; +import org.springframework.data.jpa.repository.config.EnableJpaRepositories; +import org.springframework.orm.jpa.JpaTransactionManager; +import org.springframework.orm.jpa.LocalContainerEntityManagerFactoryBean; +import org.springframework.orm.jpa.vendor.HibernateJpaVendorAdapter; +import org.springframework.transaction.PlatformTransactionManager; + +import javax.sql.DataSource; +import java.util.HashMap; + +@Configuration +@EnableJpaRepositories( + basePackages = "com.ai.lawyer.domain.*", + entityManagerFactoryRef = "dataEntityManager", + transactionManagerRef = "dataTransactionManager" +) +public class DataDBConfig { + + @Value("${spring.datasource.url}") + private String url; + @Value("${spring.datasource.username}") + private String username; + @Value("${spring.datasource.password}") + private String password; + @Value("${spring.datasource.driver-class-name}") + private String driver; + + @Bean + @Primary + public DataSource dataDBSource() { + return DataSourceBuilder.create() + .url(url) + .username(username) + .password(password) + .driverClassName(driver) + .build(); + } + + @Bean + @Primary + public LocalContainerEntityManagerFactoryBean dataEntityManager() { + + LocalContainerEntityManagerFactoryBean em = new LocalContainerEntityManagerFactoryBean(); + + em.setDataSource(dataDBSource()); + em.setPackagesToScan(new String[]{"com.ai.lawyer.domain.*"}); + em.setJpaVendorAdapter(new HibernateJpaVendorAdapter()); + + HashMap properties = new HashMap<>(); + properties.put("hibernate.hbm2ddl.auto", "update"); + properties.put("hibernate.show_sql", "true"); + properties.put( + "hibernate.physical_naming_strategy", + "org.hibernate.boot.model.naming.CamelCaseToUnderscoresNamingStrategy" + ); + + em.setJpaPropertyMap(properties); + + return em; + } + + @Bean + @Primary + public PlatformTransactionManager dataTransactionManager() { + + JpaTransactionManager transactionManager = new JpaTransactionManager(); + + transactionManager.setEntityManagerFactory(dataEntityManager().getObject()); + + return transactionManager; + } + +} \ No newline at end of file diff --git a/backend/src/main/java/com/ai/lawyer/global/config/MetaDBConfig.java b/backend/src/main/java/com/ai/lawyer/global/config/MetaDBConfig.java new file mode 100644 index 00000000..bae40ebc --- /dev/null +++ b/backend/src/main/java/com/ai/lawyer/global/config/MetaDBConfig.java @@ -0,0 +1,21 @@ +package com.ai.lawyer.global.config; + +import org.springframework.boot.autoconfigure.batch.BatchDataSource; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.boot.jdbc.DataSourceBuilder; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import javax.sql.DataSource; + +@Configuration +public class MetaDBConfig { + + @Bean + @BatchDataSource + @ConfigurationProperties(prefix = "spring.datasource-meta") + public DataSource metaDBSource() { + return DataSourceBuilder.create().build(); + } + +} \ No newline at end of file diff --git a/backend/src/main/java/com/ai/lawyer/global/exception/ErrorResponse.java b/backend/src/main/java/com/ai/lawyer/global/exception/ErrorResponse.java deleted file mode 100644 index 705f04f1..00000000 --- a/backend/src/main/java/com/ai/lawyer/global/exception/ErrorResponse.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.ai.lawyer.global.exception; - -import lombok.AllArgsConstructor; -import lombok.Data; -import lombok.NoArgsConstructor; - -@Data -@AllArgsConstructor -@NoArgsConstructor -public class ErrorResponse { - - private String code; - - private String message; - -} diff --git a/backend/src/main/java/com/ai/lawyer/global/exception/GlobalExceptionHandler.java b/backend/src/main/java/com/ai/lawyer/global/exception/GlobalExceptionHandler.java deleted file mode 100644 index b36d88a8..00000000 --- a/backend/src/main/java/com/ai/lawyer/global/exception/GlobalExceptionHandler.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.ai.lawyer.global.exception; - -import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.ExceptionHandler; -import org.springframework.web.bind.annotation.RestControllerAdvice; - -@RestControllerAdvice -public class GlobalExceptionHandler { - - @ExceptionHandler(IllegalArgumentException.class) - public ResponseEntity handleIllegalArgument(IllegalArgumentException e) { - return ResponseEntity.badRequest().body( - new ErrorResponse("400", e.getMessage()) - ); - } - -} diff --git a/backend/src/main/java/com/ai/lawyer/global/qdrant/entity/Qdrent.java b/backend/src/main/java/com/ai/lawyer/global/qdrant/entity/Qdrent.java deleted file mode 100644 index 9dfa33b9..00000000 --- a/backend/src/main/java/com/ai/lawyer/global/qdrant/entity/Qdrent.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.ai.lawyer.global.qdrant.entity; - -import jakarta.persistence.*; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - - -@Entity -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -@Table(name = "qdrent") -public class Qdrent { - - @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) - private Long id; - - private Long PointsCount; - -} diff --git a/backend/src/main/java/com/ai/lawyer/global/qdrant/initializer/QdrantInitializer.java b/backend/src/main/java/com/ai/lawyer/global/qdrant/initializer/QdrantInitializer.java new file mode 100644 index 00000000..db17e4d9 --- /dev/null +++ b/backend/src/main/java/com/ai/lawyer/global/qdrant/initializer/QdrantInitializer.java @@ -0,0 +1,46 @@ +package com.ai.lawyer.global.qdrant.initializer; + +import io.qdrant.client.QdrantClient; +import jakarta.annotation.PostConstruct; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import java.util.concurrent.ExecutionException; + +@Slf4j +@Component +@RequiredArgsConstructor +public class QdrantInitializer { + + private final QdrantClient qdrantClient; + + @Value("${spring.ai.vectorstore.qdrant.collection-name}") + private String collectionName; + + @Value("${spring.ai.vectorstore.qdrant.vector-size}") + private Long vectorSize; + + @PostConstruct + private void existQdrantCollection() throws InterruptedException, ExecutionException { + var collections = qdrantClient.listCollectionsAsync().get(); + boolean collectionExists = collections.stream() + .anyMatch(collection -> collection.equals(collectionName)); + + if (!collectionExists) { + log.info("'{}' 컬렉션이 존재하지 않아 새로 생성 중", collectionName); + qdrantClient.createCollectionAsync( + collectionName, + io.qdrant.client.grpc.Collections.VectorParams.newBuilder() + .setSize(vectorSize.intValue()) + .setDistance(io.qdrant.client.grpc.Collections.Distance.Cosine) + .build() + ).get(); + log.info("'{}' 컬렉션 생성 완료", collectionName); + } else { + log.info("'{}' 컬렉션이 이미 존재합니다.", collectionName); + } + } + +} \ No newline at end of file diff --git a/backend/src/main/java/com/ai/lawyer/global/qdrant/loader/LawLoader.java b/backend/src/main/java/com/ai/lawyer/global/qdrant/loader/LawLoader.java deleted file mode 100644 index 5430a2e0..00000000 --- a/backend/src/main/java/com/ai/lawyer/global/qdrant/loader/LawLoader.java +++ /dev/null @@ -1,193 +0,0 @@ -package com.ai.lawyer.global.qdrant.loader; - -import com.ai.lawyer.domain.law.entity.*; -import com.ai.lawyer.domain.law.repository.*; -import com.ai.lawyer.domain.precedent.repository.PrecedentRepository; -import com.ai.lawyer.global.qdrant.entity.Qdrent; -import com.ai.lawyer.global.qdrant.repository.QdrantRepository; -import io.qdrant.client.QdrantClient; -import io.qdrant.client.grpc.Collections; -import jakarta.annotation.PostConstruct; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.springframework.ai.document.Document; -import org.springframework.ai.transformer.splitter.TextSplitter; -import org.springframework.ai.vectorstore.VectorStore; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.stereotype.Component; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ExecutionException; - -@Slf4j -@Component -@RequiredArgsConstructor -public class LawLoader { - - private final PrecedentRepository precedentRepository; - private final VectorStore vectorStore; - private final QdrantClient qdrantClient; - private final TextSplitter textSplitter; - - private final LawRepository lawRepository; - private final HangRepository hangRepository; - private final JoRepository joRepository; - private final JangRepository jangRepository; - private final HoRepository hoRepository; - private final QdrantRepository qdrantRepository; - - - @Value("${spring.ai.vectorstore.qdrant.collection-name}") - private String collectionName; - - @Value("${spring.ai.vectorstore.qdrant.vector-size}") - private Long vectorSize; - - // 순서: - @PostConstruct - public void init() throws ExecutionException, InterruptedException { - - existQdrantCollection(); - - Qdrent qdrent = qdrantRepository.findById(1L).orElse( - Qdrent.builder().PointsCount(0L).build() - ); - - if (!verification(qdrent.getPointsCount())) { - return; - } - - //loadCasesIntoVectorStore(); 주석 풀기 금지 -> 과금 - //loadLawsIntoVectorStore(); 주석 풀기 금지 -> 과금 - - qdrent.setPointsCount(qdrantClient.getCollectionInfoAsync(collectionName).get().getPointsCount()); - - qdrantRepository.save(qdrent); - } - - public void loadCasesIntoVectorStore() { - log.info("판례 데이터 벡터화를 시작합니다..."); - - List documents = precedentRepository.findAll().stream() - .limit(5) - .flatMap(lawEntity -> { - Document originalDoc = new Document( - lawEntity.getPrecedentContent(), - Map.of("type", "판례", "caseNumber", lawEntity.getCaseNumber(), "court", lawEntity.getCourtName()) - ); - return textSplitter.apply(List.of(originalDoc)).stream(); - }).toList(); - - vectorStore.add(documents); - log.info("판례 데이터 {}건을 벡터 저장소에 성공적으로 저장했습니다.", documents.size()); - } - - public void loadLawsIntoVectorStore() { - log.info("법령 데이터 벡터화를 시작합니다..."); - List allChunks = new ArrayList<>(); - - List laws = lawRepository.findAll(); - int lawCount = 0; - for (Law law : laws) { - if (lawCount++ >= 10) break; - - List jangs = jangRepository.findByLaw(law); - int jangCount = 0; - for (Jang jang : jangs) { - if (jangCount++ >= 10) break; - - List jos = joRepository.findByJang(jang); - int joCount = 0; - for (Jo jo : jos) { - if (joCount++ >= 10) break; - - StringBuilder contentBuilder = new StringBuilder(); - - if (jo.getContent() != null && !jo.getContent().isBlank()) { - contentBuilder.append(jo.getContent()).append("\n"); - } - - List hangs = hangRepository.findByJo(jo); - int hangCount = 0; - for (Hang hang : hangs) { - if (hangCount++ >= 10) break; - - if (hang.getContent() != null && !hang.getContent().isBlank()) { - contentBuilder.append(hang.getContent()).append("\n"); - } - - List hos = hoRepository.findByHang(hang); - int hoCount = 0; - for (Ho ho : hos) { - if (hoCount++ >= 10) break; - - if (ho.getContent() != null && !ho.getContent().isBlank()) { - contentBuilder.append(ho.getContent()).append("\n"); - } - } - } - - Map metadata = new HashMap<>(); - metadata.put("type", "법령"); - metadata.put("lawName", law.getLawName()); - - Document originalDoc = new Document(contentBuilder.toString(), metadata); - List chunks = textSplitter.apply(List.of(originalDoc)); - allChunks.addAll(chunks); - } - } - } - - vectorStore.add(allChunks); - log.info("법령 데이터 {}건을 벡터 저장소에 성공적으로 저장했습니다.", allChunks.size()); - } - - private void existQdrantCollection() throws InterruptedException, ExecutionException { - // 현재 Qdrant에 있는 모든 컬렉션 목록을 가져옴 - var collections = qdrantClient.listCollectionsAsync().get(); - boolean collectionExists = collections.stream() - .anyMatch(collection -> collection.equals(collectionName)); - - // 만약 컬렉션이 없다면, 새로 생성 - if (!collectionExists) { - log.info("'{}' 컬렉션이 존재하지 않아 새로 생성중", collectionName); - qdrantClient.createCollectionAsync( - collectionName, - Collections.VectorParams.newBuilder() - .setSize(vectorSize) // yml에 설정된 벡터 크기 - .setDistance(Collections.Distance.Cosine) // 가장 일반적인 거리 측정 방식 - .build() - ).get(); - log.info("'{}' 컬렉션 생성을 완료했습니다.", collectionName); - } else { - log.info("'{}' 컬렉션이 이미 존재합니다.", collectionName); - } - } - - private boolean verification(Long count) throws ExecutionException, InterruptedException { - - if (lawRepository.count() == 0) { - log.warn("데이터베이스에 법령 데이터가 없습니다. data.sql을 확인하세요."); - return false; - } - - if (precedentRepository.count() == 0) { - log.warn("데이터베이스에 판례 데이터가 없습니다. data.sql을 확인하세요."); - return false; - } - - if (count == 0) { - return true; - } - - if (qdrantClient.getCollectionInfoAsync(collectionName).get().getPointsCount() == count) { - log.info("Qdrant 벡터 저장소에 이미 모든 데이터가 존재합니다."); - return false; - } - - return true; - } -} \ No newline at end of file diff --git a/backend/src/main/java/com/ai/lawyer/global/qdrant/repository/QdrantRepository.java b/backend/src/main/java/com/ai/lawyer/global/qdrant/repository/QdrantRepository.java deleted file mode 100644 index 59b82ba3..00000000 --- a/backend/src/main/java/com/ai/lawyer/global/qdrant/repository/QdrantRepository.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.ai.lawyer.global.qdrant.repository; - -import com.ai.lawyer.global.qdrant.entity.Qdrent; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.stereotype.Repository; - -@Repository -public interface QdrantRepository extends JpaRepository { -} diff --git a/backend/src/main/java/com/ai/lawyer/global/qdrant/service/QdrantService.java b/backend/src/main/java/com/ai/lawyer/global/qdrant/service/QdrantService.java index 7be6a003..526af9e7 100644 --- a/backend/src/main/java/com/ai/lawyer/global/qdrant/service/QdrantService.java +++ b/backend/src/main/java/com/ai/lawyer/global/qdrant/service/QdrantService.java @@ -7,8 +7,11 @@ import org.springframework.ai.vectorstore.filter.Filter; import org.springframework.stereotype.Service; +import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.List; +import java.util.stream.Collectors; @Service @RequiredArgsConstructor @@ -16,26 +19,52 @@ public class QdrantService { private final VectorStore vectorStore; - public List searchDocument(String query, String key, String value, int topK) { + public List searchDocument(String query, String key, String value) { - SearchRequest caseSearchRequest = SearchRequest.builder() - .query(query) - .topK(topK) + SearchRequest findCaseNumberRequest = SearchRequest.builder() + .query(query).topK(1) .filterExpression(new Filter.Expression(Filter.ExpressionType.EQ, new Filter.Key(key), new Filter.Value(value))) .build(); - List similarCaseDocuments = vectorStore.similaritySearch(caseSearchRequest); - - if (caseSearchRequest == null) { - return Collections.singletonList( - Document.builder() - .text("더미") - .metadata(key, value) - .score(0.0) - .build() - ); + List mostSimilarDocuments = vectorStore.similaritySearch(findCaseNumberRequest); + + + if (mostSimilarDocuments.isEmpty()) { + return Collections.emptyList(); + } + String targetCaseNumber = (String) mostSimilarDocuments.get(0).getMetadata().get("caseNumber"); + if (targetCaseNumber == null) { + return mostSimilarDocuments; + } + + SearchRequest fetchAllChunksRequest = SearchRequest.builder() + .query(query).topK(100) + .filterExpression(new Filter.Expression(Filter.ExpressionType.EQ, new Filter.Key("caseNumber"), new Filter.Value(targetCaseNumber))) + .build(); + List allChunksOfCase = new ArrayList<>(vectorStore.similaritySearch(fetchAllChunksRequest)); + + if (allChunksOfCase.isEmpty()) { + return Collections.emptyList(); } - return similarCaseDocuments; + allChunksOfCase.sort(Comparator.comparingInt(doc -> + ((Number) doc.getMetadata().get("chunkIndex")).intValue() + )); + + String mergedContent = allChunksOfCase.stream() + .map(Document::getText) + .collect(Collectors.joining("")); + + Document bestScoringDoc = allChunksOfCase.stream() + .max(Comparator.comparing(Document::getScore)) + .orElse(allChunksOfCase.get(0)); + + Document finalDocument = Document.builder() + .text(mergedContent) + .metadata(bestScoringDoc.getMetadata()) + .score(bestScoringDoc.getScore()) + .build(); + + return Collections.singletonList(finalDocument); } } \ No newline at end of file diff --git a/backend/src/main/resources/application-dev.yml b/backend/src/main/resources/application-dev.yml index 454c3122..9320def5 100644 --- a/backend/src/main/resources/application-dev.yml +++ b/backend/src/main/resources/application-dev.yml @@ -16,6 +16,18 @@ spring: port: ${DEV_REDIS_PORT} password: ${DEV_REDIS_PASSWORD} embedded: false + + batch: + job: + enabled: false # 최소 한번 시작 + jdbc: + initialize-schema: always + + datasource-meta: + driver-class-name: com.mysql.cj.jdbc.Driver + jdbc-url: jdbc:mysql://localhost:3306/meta_db?useSSL=false&useUnicode=true&serverTimezone=Asia/Seoul&allowPublicKeyRetrieval=true&createDatabaseIfNotExist=true + username: root + password: balaw security: oauth2: client: diff --git a/backend/src/main/resources/application.yml b/backend/src/main/resources/application.yml index 5cbf4b72..fb9187f4 100644 --- a/backend/src/main/resources/application.yml +++ b/backend/src/main/resources/application.yml @@ -7,7 +7,7 @@ spring: - org.springframework.boot.autoconfigure.session.SessionAutoConfiguration config: - import: optional:file:.env[.properties], classpath:system-prompt.yml + import: optional:file:.env[.properties], classpath:system-prompt.yml, classpath:batch-pagsize.yml application: name: back @@ -35,36 +35,37 @@ spring: writetimeout: 5000 auth-code-expiration-millis: 1800000 - ai: openai: api-key: ${OPENAI_API_KEY} chat: options: model: gpt-4.1-nano + + ollama: + base-url: http://localhost:11434 embedding: options: - model: text-embedding-3-small - - chat: - memory: - repository: - jdbc: - initialize-schema: never + model: daynice/kure-v1:567m vectorstore: qdrant: host: ${SPRING_AI_VECTORSTORE_QDRANT_HOST} port: ${SPRING_AI_VECTORSTORE_QDRANT_PORT} collection-name: "legal_cases" - vector-size: 1536 + vector-size: 1024 + + chat: + memory: + repository: + jdbc: + initialize-schema: never jpa: show-sql: true hibernate: ddl-auto: ${SPRING_JPA_HIBERNATE_DDL_AUTO} - # naming: - # physical-strategy: org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + properties: hibernate: use_sql_comments: true diff --git a/backend/src/main/resources/batch-pagsize.yml b/backend/src/main/resources/batch-pagsize.yml new file mode 100644 index 00000000..a6378431 --- /dev/null +++ b/backend/src/main/resources/batch-pagsize.yml @@ -0,0 +1,7 @@ +batch: + page: + size: + precedent: 1 # 판례 개수 + law: 1 # 법령 개수 + scheduler: + run-every-minute: false # 매 초마다 스케줄러 실행 여부 \ No newline at end of file diff --git a/backend/src/main/resources/data.sql b/backend/src/main/resources/data.sql index ae5f2eb5..063a19bb 100644 --- a/backend/src/main/resources/data.sql +++ b/backend/src/main/resources/data.sql @@ -1,33 +1,2 @@ --- MySQL 용 -/*CREATE TABLE IF NOT EXISTS SPRING_AI_CHAT_MEMORY ( - `conversation_id` VARCHAR(36) NOT NULL, - `content` TEXT NOT NULL, - `type` ENUM('USER', 'ASSISTANT', 'SYSTEM', 'TOOL') NOT NULL, - `timestamp` TIMESTAMP NOT NULL, - - INDEX `SPRING_AI_CHAT_MEMORY_CONVERSATION_ID_TIMESTAMP_IDX` (`conversation_id`, `timestamp`) - );*/ - --- H2 용 -/*CREATE TABLE IF NOT EXISTS SPRING_AI_CHAT_MEMORY -( - conversation_id - VARCHAR -( - 36 -) NOT NULL, - content TEXT NOT NULL, - type VARCHAR -( - 10 -) NOT NULL, - "timestamp" TIMESTAMP NOT NULL, - PRIMARY KEY -( - conversation_id, - "timestamp" -) - ); - -CREATE INDEX IF NOT EXISTS SPRING_AI_CHAT_MEMORY_CONVERSATION_ID_TIMESTAMP_IDX - ON SPRING_AI_CHAT_MEMORY(conversation_id, "timestamp");*/ \ No newline at end of file +/* +*/ \ No newline at end of file diff --git a/backend/src/main/resources/system-prompt.yml b/backend/src/main/resources/system-prompt.yml index 727c7853..901ae80d 100644 --- a/backend/src/main/resources/system-prompt.yml +++ b/backend/src/main/resources/system-prompt.yml @@ -1,7 +1,7 @@ custom: ai: system-message: | - 귀하는 대한민국의 법률 전문가이자 유능한 변호사입니다. + 귀하는 대한민국의 유능한 변호사입니다. 당신의 임무는 사용자의 질문에 대해 아래 '관련 법령 정보'와 '관련 판례 정보'를 최대한 창의적이고 적극적으로 활용하여 답변하는 것입니다. --- 관련 법령 정보 --- @@ -13,13 +13,35 @@ custom: --- **답변 생성 지침:** - (중요) 당신은 변호사 역할을 하며, 사용자가 불필요한 답변을 요구할 경우 반드시 "해당 질문은 법과 관련된 내용이 아니기 때문에 답변드리기 어렵습니다."라고 답변하도록 하십시오. + + (매우 중요) **사용자의 질문을 최대한 법률적 관점에서 해석해야 합니다.** + 예를 들어, '코인 사기', 'OO투자 리딩방 피해'와 같이 신종 금융/기술 분야와 결합된 문제라도, 그 본질이 **'사기', '계약 불이행', '자본시장법 위반' 등 법률적 쟁점에 해당한다면 반드시 법률 문제로 간주하고 답변해야 합니다. + ** 사용자의 질문이 법률과 명백하게 관련 없는 경우(예: 요리법 질문, 날씨 문의 등)에는 "해당 질문은 법과 관련된 내용이 아니기 때문에 답변드리기 어렵습니다."라고 답변하십시오. + 1. **법령과 판례 우선 활용**: 답변은 반드시 '관련 법령 정보'와 '관련 판례 정보'에 명시된 사실과 법리를 최우선 근거로 삼아야 합니다. 어떤 법령의 몇 조, 어떤 판례를 참고했는지 언급하며 답변하면 신뢰도를 높일 수 있습니다. 2. **적극적 추론**: 제공된 정보가 사용자의 질문과 완벽하게 일치하지 않더라도, 법령의 취지나 판례에 나타난 법률적 원칙을 바탕으로 사용자의 질문에 대한 법리적 해석을 추론하여 답변해주십시오. 3. **전문가적이고 상세한 설명**: 권위 있는 어조로 법률적 근거를 명확히 제시하며, 일반인도 이해하기 쉽게 상세히 설명해야 합니다. - 4. **답변 불가 시**: 위 모든 노력에도 불구하고, 질문이 법률과 전혀 무관하거나 제공된 정보로 도저히 답변을 생성할 수 없는 경우에만, '귀하께서 유사한 판례와 법률이 없어 정확한 답변을 드리기 어렵습니다.' 라고 답변해주십시오. - 5. **필수 조언**: 답변의 마지막에는 사용자에게 실질적인 도움이 될 수 있는 조언을 반드시 포함해주십시오. - 6. **지식 활용 허용**: 제공된 법령과 판례가 내용과 다르더라도, 귀하의 전문 지식과 상식을 활용하여 답변해도 좋습니다. + 4. **필수 조언**: 답변의 마지막에는 사용자에게 실질적인 도움이 될 수 있는 조언을 반드시 포함해주십시오. + 5. **지식 활용 허용**: 제공된 법령과 판례가 내용과 다르더라도, 귀하의 전문 지식과 상식을 활용하여 답변해도 좋습니다. title-extraction: "다음 문장의 핵심을 요약하여 간결한 제목을 만들어줘: " - keyword-extraction: "여기서 법과 관련된 핵심 키워드 1~2개를 뽑아줘. (예: 뺑소니, 이혼, 학교폭력): " \ No newline at end of file + keyword-extraction: | + 1. 오직 가장 중요하고 대표적인 키워드 **하나**만 추출해야 합니다. + 2. 절대로 콤마(,)로 구분된 여러 키워드를 하나의 문자열로 합쳐서 만들면 안 됩니다. + 3. 응답은 반드시 아래 JSON 형식에 맞춰 'keywords' 리스트에 **단 하나의 문자열**만 포함해야 합니다. + + --- + **예시 1:** + - 입력 문장: 아파트 층간소음 문제로 다투던 중 이웃을 폭행하여 상해를 입혔습니다. + - 올바른 JSON 응답: + { + "keywords": ["폭행"] + } + - 잘못된 JSON 응답: + { + "keywords": ["층간소음, 폭행, 상해"] + } + --- + + 이제 아래 문장에서 위의 규칙을 엄격히 준수하여 핵심 키워드 **단 하나**를 추출해주세요: + """; \ No newline at end of file