Skip to content

Commit 44b1885

Browse files
authored
Merge pull request #154 from prgrms-web-devcourse-final-project/node/15
[REFACTOR]: 부정프롬프트 추가 및 정확도 향상
2 parents 080a224 + 97ba93f commit 44b1885

26 files changed

+2874
-703
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* 이 파일은 연령대+카테고리 테마 사전 엔티티를 정의한다.
3+
* 흐름: 테마 문자열/임베딩 보관 → min_age~max_age 및 category로 필터링
4+
*/
5+
package com.back.domain.search.entity;
6+
7+
import com.back.domain.node.entity.NodeCategory;
8+
import com.back.infra.pgvector.PgVectorConverter;
9+
import jakarta.persistence.*;
10+
import lombok.*;
11+
import org.hibernate.annotations.JdbcTypeCode;
12+
import org.hibernate.type.SqlTypes;
13+
14+
@Entity
15+
@Table(name = "age_theme")
16+
@Getter @Setter
17+
@NoArgsConstructor @AllArgsConstructor @Builder
18+
public class AgeTheme {
19+
20+
@Id
21+
@GeneratedValue(strategy = GenerationType.IDENTITY)
22+
@Column(name = "id")
23+
private Long id;
24+
25+
@Column(name = "min_age", nullable = false)
26+
private int minAge;
27+
28+
@Column(name = "max_age", nullable = false)
29+
private int maxAge;
30+
31+
@Enumerated(EnumType.STRING)
32+
@Column(name = "category", nullable = false, length = 32)
33+
private NodeCategory category;
34+
35+
@Column(name = "theme", nullable = false, columnDefinition = "text")
36+
private String theme;
37+
38+
@JdbcTypeCode(SqlTypes.OTHER)
39+
@Convert(converter = PgVectorConverter.class)
40+
@Column(name = "embedding", nullable = false, columnDefinition = "vector(768)")
41+
private float[] embedding;
42+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* [코드 흐름 요약]
3+
* - '도메인 용어 사전'을 pgvector로 관리하기 위한 엔티티.
4+
* - term(용어)와 embedding(vector)을 저장한다.
5+
* - 검색은 임베딩 유사도(<=>)로 수행한다.
6+
*/
7+
package com.back.domain.search.entity;
8+
9+
import com.back.infra.pgvector.PgVectorConverter;
10+
import jakarta.persistence.*;
11+
import lombok.*;
12+
import org.hibernate.annotations.JdbcTypeCode;
13+
import org.hibernate.type.SqlTypes;
14+
15+
@Entity
16+
@Table(name = "vocab_term")
17+
@Getter @Setter
18+
@NoArgsConstructor @AllArgsConstructor @Builder
19+
public class VocabTerm {
20+
21+
@Id @GeneratedValue(strategy = GenerationType.IDENTITY)
22+
private Long id;
23+
24+
@Column(name = "term", nullable = false, unique = true, length = 128)
25+
private String term;
26+
27+
@JdbcTypeCode(SqlTypes.OTHER)
28+
@Convert(converter = PgVectorConverter.class)
29+
@Column(name = "embedding", nullable = false, columnDefinition = "vector(768)")
30+
private float[] embedding;
31+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* [코드 흐름 요약]
3+
* - age 범위 + (선택) category로 필터하고 pgvector 유사도로 상위 K 테마를 가져온다.
4+
* - 시더 중복 방지용으로 기존 테마 문자열을 조회하는 메서드를 제공한다.
5+
*/
6+
package com.back.domain.search.repository;
7+
8+
import com.back.domain.node.entity.NodeCategory;
9+
import com.back.domain.search.entity.AgeTheme;
10+
import org.springframework.data.jpa.repository.JpaRepository;
11+
import org.springframework.data.jpa.repository.Query;
12+
import org.springframework.data.repository.query.Param;
13+
14+
import java.util.List;
15+
16+
public interface AgeThemeRepository extends JpaRepository<AgeTheme, Long> {
17+
18+
// next 노드 생성
19+
@Query(value = """
20+
SELECT theme FROM age_theme
21+
WHERE :age BETWEEN min_age AND max_age
22+
AND (:cat IS NULL OR category = :cat)
23+
ORDER BY embedding <=> CAST(:q AS vector)
24+
LIMIT :k
25+
""", nativeQuery = true)
26+
List<String> topKThemesByAgeAndCategory(
27+
@Param("age") int age,
28+
@Param("cat") String categoryOrNull, // Enum이면 cat.name()으로 전달
29+
@Param("q") String vectorLiteral,
30+
@Param("k") int k
31+
);
32+
33+
// 무결성 검증
34+
long countByMinAge(int minAge);
35+
long countByMinAgeAndCategory(int minAge, NodeCategory category);
36+
37+
// 중복 방지용(시더에서 필요할 때만 사용)
38+
@Query("select a.theme from AgeTheme a where a.minAge = :minAge and a.category = :category")
39+
List<String> findThemesByMinAgeAndCategory(@Param("minAge") int minAge,
40+
@Param("category") NodeCategory category);
41+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* [코드 흐름 요약]
3+
* - 쿼리 임베딩과 가까운 용어를 유사도(<=>) 순으로 상위 K개 조회한다.
4+
* - 임베딩은 문자열 리터럴(CAST(:q AS vector))로 전달한다.
5+
*/
6+
package com.back.domain.search.repository;
7+
8+
import com.back.domain.search.entity.VocabTerm;
9+
import org.springframework.data.jpa.repository.*;
10+
import org.springframework.data.repository.query.Param;
11+
12+
import java.util.List;
13+
14+
public interface VocabTermRepository extends JpaRepository<VocabTerm, Long> {
15+
16+
@Query(value = """
17+
SELECT * FROM vocab_term
18+
ORDER BY embedding <=> CAST(:q AS vector)
19+
LIMIT :k
20+
""", nativeQuery = true)
21+
List<VocabTerm> searchTopK(
22+
@Param("q") String vectorLiteral,
23+
@Param("k") int k
24+
);
25+
26+
@Query(value = """
27+
SELECT term FROM vocab_term
28+
ORDER BY embedding <=> CAST(:q AS vector)
29+
LIMIT :k
30+
""", nativeQuery = true)
31+
List<String> searchTopKTerms(
32+
@Param("q") String vectorLiteral,
33+
@Param("k") int k
34+
);
35+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* [코드 흐름 요약]
3+
* - 최초 워밍업: 30초 간격 10회(≈5분) → 이후 5분 간격 전환(기존 유지).
4+
* - 각 틱마다 age 커서를 1씩 이동하며 ensureSeedForAgeAsync(age, MIN_PER_CATEGORY) 호출(기존 유지).
5+
* - 10만건 목표를 위해 MIN_PER_CATEGORY=120, 연령 구간을 3~120으로 확장.
6+
*/
7+
package com.back.global.ai.bootstrap;
8+
9+
import lombok.RequiredArgsConstructor;
10+
import org.springframework.context.annotation.Profile;
11+
import org.springframework.scheduling.annotation.Scheduled;
12+
import org.springframework.stereotype.Component;
13+
14+
import java.util.concurrent.atomic.AtomicInteger;
15+
16+
@Component
17+
@Profile("dev")
18+
@RequiredArgsConstructor
19+
public class AgeThemePreseedScheduler {
20+
21+
private final AgeThemeSeeder seeder;
22+
23+
// 무결성 검증
24+
private static final int WARMUP_RUNS = 10; // 30초 간격으로 10회
25+
private static final int MIN_PER_CATEGORY = 120; // 카테고리당 최소 시드 개수(확대)
26+
private static final int MIN_AGE = 3; // 순회 시작 연령(확장)
27+
private static final int MAX_AGE = 120; // 순회 종료 연령(확장)
28+
29+
private final AtomicInteger warmupCount = new AtomicInteger(0);
30+
private final AtomicInteger ageCursor = new AtomicInteger(MIN_AGE);
31+
32+
// next 노드 생성
33+
@Scheduled(initialDelay = 5_000, fixedRate = 30_000) // 첫 실행 5초 후, 30초 주기
34+
public void warmupPhase() {
35+
int n = warmupCount.get();
36+
if (n >= WARMUP_RUNS) return; // 무결성 검증
37+
tick();
38+
warmupCount.incrementAndGet();
39+
}
40+
41+
// 무결성 검증
42+
@Scheduled(initialDelay = 5 * 60_000, fixedRate = 5 * 60_000) // 5분 주기
43+
public void steadyPhase() {
44+
if (warmupCount.get() < WARMUP_RUNS) return; // 워밍업 완료 전엔 대기
45+
tick();
46+
}
47+
48+
// next 노드 생성
49+
private void tick() {
50+
int age = nextAge();
51+
seeder.ensureSeedForAgeAsync(age, MIN_PER_CATEGORY);
52+
}
53+
54+
// 무결성 검증
55+
private int nextAge() {
56+
int cur = ageCursor.getAndUpdate(v -> (v >= MAX_AGE) ? MIN_AGE : (v + 1));
57+
return cur;
58+
}
59+
}

0 commit comments

Comments
 (0)