Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
3da7c8a
improvement: remove redundant operations and setup variables for limi…
DuGuYifei Jul 13, 2025
0046bcc
feat: add feat of normal qa in genai service
DuGuYifei Jul 16, 2025
cb2bfbc
fix: k8s-install.sh key
DuGuYifei Jul 16, 2025
7462fc6
fix: is_open_rag
DuGuYifei Jul 16, 2025
fb023c6
feat: ask question about company with rag
DuGuYifei Jul 16, 2025
e64c086
feat: json log in springboot services
DuGuYifei Jul 16, 2025
599c047
feat: json log in genai
DuGuYifei Jul 16, 2025
a53da69
feat: json for log format in promtail
DuGuYifei Jul 17, 2025
cbe5fa3
ci: auto scaling
DuGuYifei Jul 17, 2025
9b5a6fc
fix: database connection with genai-services
DuGuYifei Jul 17, 2025
f152001
ci: fix alertmanager datasource in grafana
DuGuYifei Jul 17, 2025
004cd9d
ci: fix alertmanager implementation in grafan and allow handle grafan…
DuGuYifei Jul 17, 2025
50ef612
ci: improve promtail pipeline
DuGuYifei Jul 17, 2025
5b8812e
feat: log in python same with springboot
DuGuYifei Jul 17, 2025
14d75cc
fix: module import name in main.py
DuGuYifei Jul 17, 2025
f20b188
ci: fix json in promtail
DuGuYifei Jul 17, 2025
9680d0b
ci: fix auto scaling
DuGuYifei Jul 18, 2025
399f0b1
fix: db secret
DuGuYifei Jul 18, 2025
aa97eb4
feat: add tests and put into cicd
DuGuYifei Jul 18, 2025
f96b0a1
ci: fix unit test in first step
DuGuYifei Jul 18, 2025
ba8f8c6
ci: give permission to gradlew in test
DuGuYifei Jul 18, 2025
6dd2265
ci: give permission to protobuf
DuGuYifei Jul 18, 2025
b673dd3
ci: update gradlew pipeline version in cicd
DuGuYifei Jul 18, 2025
3527a8b
fix: docker compose files
DuGuYifei Jul 18, 2025
984b99e
ci: fix aws
DuGuYifei Jul 18, 2025
6f0ec71
ci: use tag input for aws
DuGuYifei Jul 18, 2025
1dc48bc
ci: fix secrets of aws
DuGuYifei Jul 18, 2025
25efe48
ci: try fix disk space error
DuGuYifei Jul 18, 2025
03e784b
ci: fix aws
DuGuYifei Jul 18, 2025
a5eaa6c
fix: init.sql
DuGuYifei Jul 18, 2025
0fb7a20
fix: https in docker compose deploy and ansible and terraform with sc…
DuGuYifei Jul 18, 2025
3def359
fix: docker compose deploy aws
DuGuYifei Jul 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions .github/workflows/deploy-aws.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ name: Deploy to AWS

on:
workflow_dispatch:
inputs:
image:
description: "Image name and tag"
required: false

jobs:
deploy:
Expand All @@ -16,7 +20,7 @@ jobs:
uses: appleboy/scp-action@v0.1.7
with:
host: ${{ vars.EC2_PUBLIC_IP }}
username: ${{ vars.AWS_EC2_USER }}
username: ${{ secrets.AWS_EC2_USER }}
key: ${{ secrets.SERVER_SSH_KEY }}
source: "./docker-compose.prod.deploy.yml"
target: ~/srv/app/
Expand All @@ -26,18 +30,20 @@ jobs:
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ vars.EC2_PUBLIC_IP }}
username: ${{ vars.AWS_EC2_USER }}
username: ${{ secrets.AWS_EC2_USER }}
key: ${{ secrets.SERVER_SSH_KEY }}
script: |
cat > ~/srv/app/.env <<EOF
# GitHub Container Registry
GHCR_USER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')
GHCR_REPO=$(echo '${{ github.event.repository.name }}' | tr '[:upper:]' '[:lower:]')
GHCR_TAG=$(
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
echo "${{ github.sha }}"
if [[ -n "${{ github.event.inputs.image }}" ]]; then
echo "${{ github.event.inputs.image }}"
elif [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
echo "${{ github.sha }}"
else
echo "dev-${{ github.sha }}"
echo "dev-${{ github.sha }}"
fi
)
DB_USERNAME=${{ secrets.DB_USERNAME }}
Expand All @@ -55,16 +61,19 @@ jobs:
SPRING_PROFILES_ACTIVE=docker
PYTHONDONTWRITEBYTECODE=1
PYTHONUNBUFFERED=1
OLLAMA_BASE_URL="${{ secrets.OLLAMA_BASE_URL }}"
OLLAMA_MODEL="${{ vars.OLLAMA_MODEL }}"
OLLAMA_API_KEY="${{ secrets.OLLAMA_API_KEY }}"
EOF

# SSH - pull images and restart containers
- name: Pull images and restart containers remotely
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ vars.EC2_PUBLIC_IP }}
username: ${{ vars.AWS_EC2_USER }}
username: ${{ secrets.AWS_EC2_USER }}
key: ${{ secrets.SERVER_SSH_KEY }}
script: |
cd ~/srv/app
docker compose -f docker-compose.prod.deploy.yml --env-file .env pull
docker compose -f docker-compose.prod.deploy.yml --env-file .env up -d
docker compose -f docker-compose.prod.deploy.yml pull
docker compose -f docker-compose.prod.deploy.yml up -d
28 changes: 26 additions & 2 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,40 @@ on:
branches:
- main
workflow_dispatch:
# 如果输入image tag,就跳过build
# if input image tag, then skip test and build
inputs:
image:
description: "Image name and tag"
required: false

jobs:
build-and-push:
unit-test:
if: ${{ github.event.inputs.image == '' || github.event.inputs.image == null }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Setup Gradle
uses: gradle/actions/setup-gradle@v4.4.1

- name: Set up JDK
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '21'

- name: Make gradlew executable
run: chmod +x gradlew

- name: Make protobuf executable
run: chmod +x service-application/protoc-gen-grpc-java-1.73.0-linux-x86_64.exe

- name: Run tests for all modules
run: ./gradlew test

build-and-push:
needs: unit-test
runs-on: ubuntu-latest

permissions:
contents: read
Expand Down
1 change: 1 addition & 0 deletions ansible/ansible.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ansible-playbook -i hosts.ini deploy.yml
32 changes: 16 additions & 16 deletions ansible/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
update_cache: yes
when: ansible_os_family == "Debian"

- name: Install Docker and Docker Compose plugin (for Ubuntu)
apt:
name:
- docker.io
- docker-compose-plugin
state: present
- name: Install Docker using official script (for Ubuntu)
shell: |
curl -fsSL https://get.docker.com | bash
args:
executable: /bin/bash
when: ansible_os_family == "Debian"
become: true

- name: Add the user to the docker group
user:
Expand All @@ -28,14 +28,14 @@
state: directory
mode: '0755'

- name: Copy .env to server
copy:
src: .env
dest: /srv/app/.env
mode: '0600'
- name: Create letsencrypt
file:
path: /srv/app/letsencrypt
state: directory
mode: '0755'

- name: Copy docker-compose.prod.yml to server
copy:
src: docker-compose.prod.yml
dest: /srv/app/docker-compose.prod.yml
mode: '0644'
- name: Create acme.json
file:
path: /srv/app/letsencrypt/acme.json
state: touch
mode: '0600'
2 changes: 2 additions & 0 deletions ansible/hosts.ini.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[aws]
xxx.xxx.xxx.xxx ansible_user=xxx ansible_ssh_private_key_file=/home/xxx/.pem_temp/xxx.pem
2 changes: 2 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ subprojects {
implementation 'org.springframework.boot:spring-boot-starter-actuator'
implementation 'io.micrometer:micrometer-registry-prometheus:1.15.1'

implementation("net.logstash.logback:logstash-logback-encoder:8.1")

developmentOnly 'org.springframework.boot:spring-boot-devtools'
annotationProcessor 'org.springframework.boot:spring-boot-configuration-processor'

Expand Down
43 changes: 19 additions & 24 deletions docker-compose.prod.deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
- "--entrypoints.web.http.redirections.entrypoint.to=websecure"
- "--entrypoints.web.http.redirections.entrypoint.scheme=https"
- "--providers.docker=true"
- "--providers.docker.exposedByDefault=false"
- "--providers.docker.exposedbydefault=false"
- "--certificatesresolvers.letsencrypt.acme.httpchallenge=true"
- "--certificatesresolvers.letsencrypt.acme.httpchallenge.entrypoint=web"
- "--certificatesresolvers.letsencrypt.acme.email=admin@aihr.com"
Expand All @@ -20,16 +20,11 @@ services:
ports:
- "80:80" # HTTP
- "443:443" # HTTPS
- "8080:8080" # Traefik dashboard
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- /home/ubuntu/srv/app/letsencrypt:/letsencrypt
networks:
- ai-hr-network
labels:
- "traefik.enable=true"
- "traefik.http.routers.traefik.rule=Host(`traefik.localhost`)"
- "traefik.http.routers.traefik.service=api@internal"
- "traefik.http.routers.traefik.entrypoints=web"

# PostgreSQL database
postgresql:
Expand All @@ -48,7 +43,7 @@ services:
test:
[
"CMD-SHELL",
"PGPASSWORD=$$POSTGRES_PASSWORD psql -U $$POSTGRES_USER -d ai_db -tAc \"SELECT to_regclass('public.embeddings');\" | grep -q embeddings"
"PGPASSWORD=$$POSTGRES_PASSWORD psql -U $$POSTGRES_USER -d postgres -tAc \"SELECT 1 FROM pg_database WHERE datname='ai_db';\" | grep -q 1"
]
interval: 10s
timeout: 5s
Expand Down Expand Up @@ -89,8 +84,8 @@ services:
retries: 3
labels:
- "traefik.enable=true"
- "traefik.http.routers.auth.rule=Host(`localhost`) && PathPrefix(`/api/v1/auth`)"
- "traefik.http.routers.auth.entrypoints=web"
- "traefik.http.routers.auth.rule=Host(`ec2-34-236-4-221.compute-1.amazonaws.com`) && PathPrefix(`/api/v1/auth`)"
- "traefik.http.routers.auth.entrypoints=websecure"
- "traefik.http.services.auth.loadbalancer.server.port=8080"

# Job management service
Expand All @@ -100,8 +95,6 @@ services:
depends_on:
postgresql:
condition: service_healthy
service-auth:
condition: service_healthy
networks:
- ai-hr-network
environment:
Expand All @@ -128,8 +121,8 @@ services:
retries: 3
labels:
- "traefik.enable=true"
- "traefik.http.routers.job.rule=Host(`localhost`) && PathPrefix(`/api/v1/jobs`)"
- "traefik.http.routers.job.entrypoints=web"
- "traefik.http.routers.job.rule=Host(`ec2-34-236-4-221.compute-1.amazonaws.com`) && PathPrefix(`/api/v1/jobs`)"
- "traefik.http.routers.job.entrypoints=websecure"
- "traefik.http.services.job.loadbalancer.server.port=8081"

# Application management service
Expand All @@ -139,10 +132,6 @@ services:
depends_on:
postgresql:
condition: service_healthy
service-auth:
condition: service_healthy
service-job:
condition: service_healthy
networks:
- ai-hr-network
environment:
Expand Down Expand Up @@ -174,8 +163,8 @@ services:
retries: 3
labels:
- "traefik.enable=true"
- "traefik.http.routers.application.rule=Host(`localhost`) && (PathPrefix(`/api/v1/applications`) || PathPrefix(`/api/v1/chat`) || PathPrefix(`/api/v1/assessments`) || PathPrefix(`/api/v1/files`))"
- "traefik.http.routers.application.entrypoints=web"
- "traefik.http.routers.application.rule=Host(`ec2-34-236-4-221.compute-1.amazonaws.com`) && (PathPrefix(`/api/v1/applications`) || PathPrefix(`/api/v1/chat`) || PathPrefix(`/api/v1/assessments`) || PathPrefix(`/api/v1/files`))"
- "traefik.http.routers.application.entrypoints=websecure"
- "traefik.http.services.application.loadbalancer.server.port=8082"

# GenAI service
Expand All @@ -190,8 +179,12 @@ services:
environment:
- PYTHONDONTWRITEBYTECODE=1
- PYTHONUNBUFFERED=1
- OLLAMA_BASE_URL=${OLLAMA_BASE_URL} \
- OLLAMA_MODEL=${OLLAMA_MODEL} \
- DB_HOST=${DB_HOST:-postgresql}
- DB_PORT=${DB_PORT:-5432}
- DB_USERNAME=${DB_USERNAME:-postgres}
- DB_PASSWORD=${DB_PASSWORD:-postgres}
- OLLAMA_BASE_URL=${OLLAMA_BASE_URL}
- OLLAMA_MODEL=${OLLAMA_MODEL}
- OLLAMA_API_KEY=${OLLAMA_API_KEY}

# Frontend
Expand All @@ -202,8 +195,10 @@ services:
- ai-hr-network
labels:
- "traefik.enable=true"
- "traefik.http.routers.frontend.rule=Host(`localhost`)"
- "traefik.http.routers.frontend.entrypoints=web"
- "traefik.http.routers.frontend.rule=Host(`ec2-34-236-4-221.compute-1.amazonaws.com`)"
- "traefik.http.routers.frontend.entrypoints=websecure"
- "traefik.http.routers.frontend.tls=true"
- "traefik.http.routers.frontend.tls.certresolver=letsencrypt"
- "traefik.http.services.frontend.loadbalancer.server.port=80"

networks:
Expand Down
6 changes: 5 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ services:
test:
[
"CMD-SHELL",
"PGPASSWORD=$$POSTGRES_PASSWORD psql -U $$POSTGRES_USER -d ai_db -tAc \"SELECT to_regclass('public.embeddings');\" | grep -q embeddings"
"PGPASSWORD=$$POSTGRES_PASSWORD psql -U $$POSTGRES_USER -d postgres -tAc \"SELECT 1 FROM pg_database WHERE datname='ai_db';\" | grep -q 1"
]
interval: 10s
timeout: 5s
Expand Down Expand Up @@ -186,6 +186,10 @@ services:
environment:
- PYTHONDONTWRITEBYTECODE=1
- PYTHONUNBUFFERED=1
- DB_HOST=${DB_HOST:-postgresql}
- DB_PORT=${DB_PORT:-5432}
- DB_USERNAME=${DB_USERNAME:-postgres}
- DB_PASSWORD=${DB_PASSWORD:-postgres}
- OLLAMA_BASE_URL=${OLLAMA_BASE_URL}
- OLLAMA_MODEL=${OLLAMA_MODEL}
- OLLAMA_API_KEY=${OLLAMA_API_KEY}
Expand Down
4 changes: 0 additions & 4 deletions helm/aihr/charts/postgresql/templates/secret.yaml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ spec:
- name: application
image: "{{ default $defaultRepo .Values.image.repository }}:{{ default $tag .Values.image.tag }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
ports:
- name: http
containerPort: {{ .Values.service.port }}
Expand Down
22 changes: 22 additions & 0 deletions helm/aihr/charts/service-application/templates/hpa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "service-application.fullname" . }}
labels:
app: {{ include "service-application.fullname" . }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "service-application.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- end }}
6 changes: 6 additions & 0 deletions helm/aihr/charts/service-application/values.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
replicaCount: 1

autoscaling:
enabled: true
minReplicas: 1
maxReplicas: 3
targetCPUUtilizationPercentage: 70

image:
# 默认拼接:ghcr.io/<user>/<repo>-service-application:<tag>
repository: ""
Expand Down
7 changes: 7 additions & 0 deletions helm/aihr/charts/service-auth/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ spec:
- name: auth
image: "{{ default $defaultRepo .Values.image.repository }}:{{ default $tag .Values.image.tag }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
ports:
- name: http
containerPort: {{ .Values.service.port }}
Expand Down
Loading