-
Notifications
You must be signed in to change notification settings - Fork 515
Adding the complete architecture for search benchmarking #2740
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 31 commits
9c9ab67
9018d75
71b1e6b
7b828ab
c533f44
fbc226e
d750e7d
91d3001
044506c
56dc16a
3dd42ed
9beb5d2
8b99e46
3e1954d
fd7891b
5f9f074
825ed0c
72e616d
76a1201
6beae8c
7e8ea88
4ccb192
da1f773
73144dc
0865207
8938733
b7c2599
77d08f2
ef382cb
0f32b37
2d08814
7ca4173
cfe1608
835513d
2b685cf
f4eaeda
6a28a67
341fc76
dfbdb29
17348d1
dd2608e
dc68379
3f32001
8a4d747
f9212d8
f8c12de
ec80a78
373f3ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -213,6 +213,62 @@ jobs: | |||||||||||||
| with: | ||||||||||||||
| name: PDF build logs | ||||||||||||||
| path: ${{ github.workspace }}/latex-debug-logs | ||||||||||||||
| - name: Upload search index | ||||||||||||||
| if: ${{ matrix.format == 'html' }} | ||||||||||||||
| uses: actions/upload-artifact@v4 | ||||||||||||||
| with: | ||||||||||||||
| name: search-index | ||||||||||||||
| path: docs/build/search_index.js | ||||||||||||||
|
|
||||||||||||||
| benchmarks: | ||||||||||||||
| name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} | ||||||||||||||
| runs-on: ${{ matrix.os }} | ||||||||||||||
| needs: docs | ||||||||||||||
| strategy: | ||||||||||||||
| fail-fast: false | ||||||||||||||
| matrix: | ||||||||||||||
| version: | ||||||||||||||
| - '1' | ||||||||||||||
| os: | ||||||||||||||
| - ubuntu-latest | ||||||||||||||
| arch: | ||||||||||||||
| - x64 | ||||||||||||||
| steps: | ||||||||||||||
| - uses: actions/checkout@v4 | ||||||||||||||
| - uses: julia-actions/setup-julia@v2 | ||||||||||||||
| with: | ||||||||||||||
| version: ${{ matrix.version }} | ||||||||||||||
| arch: ${{ matrix.arch }} | ||||||||||||||
| show-versioninfo: true | ||||||||||||||
| - uses: julia-actions/cache@v2 | ||||||||||||||
| - uses: julia-actions/julia-buildpkg@v1 | ||||||||||||||
| - name: Download search index | ||||||||||||||
| uses: actions/download-artifact@v4 | ||||||||||||||
| with: | ||||||||||||||
| name: search-index | ||||||||||||||
| path: docs/build | ||||||||||||||
| - name: Build test examples | ||||||||||||||
| shell: julia --color=yes --project=test/examples {0} | ||||||||||||||
| run: | | ||||||||||||||
| using Pkg | ||||||||||||||
| Pkg.instantiate() | ||||||||||||||
| env: | ||||||||||||||
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||||||||||||
| - uses: actions/setup-node@v4 | ||||||||||||||
| with: | ||||||||||||||
| node-version: '20.x' | ||||||||||||||
| - name: Install Node.js dependencies | ||||||||||||||
| run: npm install | ||||||||||||||
| working-directory: test/search | ||||||||||||||
|
||||||||||||||
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: '20.x' | |
| - name: Install Node.js dependencies | |
| run: npm install | |
| working-directory: test/search |
Rahban1 marked this conversation as resolved.
Show resolved
Hide resolved
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| # Represents the evaluation results for a single search query | ||
| struct QueryResult | ||
| query::String | ||
| precision::Float64 | ||
| recall::Float64 | ||
| f1::Float64 | ||
| expected::Vector{String} | ||
| actual::Vector{String} | ||
| # Raw integer values used in calculations | ||
| relevant_count::Int # Number of relevant documents found | ||
| total_retrieved::Int # Total number of documents retrieved | ||
| total_relevant::Int # Total number of relevant documents | ||
| end | ||
|
|
||
| # Aggregates evaluation results across multiple search queries | ||
| struct EvaluationResults | ||
| individual_results::Vector{QueryResult} | ||
| average_precision::Float64 | ||
| average_recall::Float64 | ||
| average_f1_score::Float64 | ||
| # Raw integer values for overall evaluation | ||
| total_relevant_found::Int # Total number of relevant documents found across all queries | ||
| total_documents_retrieved::Int # Total number of documents retrieved across all queries | ||
| total_relevant_documents::Int # Total number of relevant documents across all queries | ||
| end | ||
|
|
||
| # Calculates precision for search results against expected documents | ||
| # Precision = (relevant documents found) / (total documents retrieved) | ||
| # Returns precision score, count of relevant documents found, and total documents retrieved | ||
| function calculate_precision(results, expected_docs) | ||
| if isempty(results) | ||
| return 0.0, 0, 0 | ||
| end | ||
|
|
||
| relevant_count = length(intersect(results, expected_docs)) | ||
| total_retrieved = length(results) | ||
|
|
||
| return relevant_count / total_retrieved, relevant_count, total_retrieved | ||
| end | ||
|
|
||
| # Calculates recall for search results against expected documents | ||
| # Recall = (relevant documents found) / (total relevant documents) | ||
| # Measures completeness of the search results - how many of the relevant documents were found | ||
| # Returns recall score, count of relevant documents found, and total relevant documents | ||
| function calculate_recall(results, expected_docs) | ||
| if isempty(expected_docs) | ||
| return 1.0, 0, 0 | ||
| end | ||
|
|
||
| found_count = length(intersect(results, expected_docs)) | ||
| total_relevant = length(expected_docs) | ||
|
|
||
| return found_count / total_relevant, found_count, total_relevant | ||
| end | ||
|
|
||
| # Calculates F1 score from precision and recall values | ||
| # F1 = 2 * (precision * recall) / (precision + recall) | ||
| # Combines precision and recall into a single score, giving equal weight to both metrics | ||
| # Returns 0.0 if both precision and recall are 0 | ||
| function calculate_f1(precision, recall) | ||
| if precision + recall == 0 | ||
| return 0.0 | ||
| end | ||
|
|
||
| return 2 * (precision * recall) / (precision + recall) | ||
| end | ||
|
|
||
| # Evaluates a single search query using the provided search function | ||
| # Returns a QueryResult containing precision, recall, and F1 metrics | ||
| function evaluate_query(search_function, query::TestQuery) | ||
| results = search_function(query.query) | ||
|
|
||
| precision, relevant_count, total_retrieved = calculate_precision(results, query.expected_docs) | ||
| recall, found_count, total_relevant = calculate_recall(results, query.expected_docs) | ||
| f1 = calculate_f1(precision, recall) | ||
|
|
||
| return QueryResult( | ||
| query.query, | ||
| precision, | ||
| recall, | ||
| f1, | ||
| query.expected_docs, | ||
| results, | ||
| relevant_count, | ||
| total_retrieved, | ||
| total_relevant | ||
| ) | ||
| end | ||
|
|
||
| # Evaluates multiple search queries and aggregates the results | ||
| # Returns an EvaluationResults containing average metrics across all queries | ||
| function evaluate_all(search_function, queries) | ||
| results = [evaluate_query(search_function, q) for q in queries] | ||
|
|
||
| avg_precision = mean([r.precision for r in results]) | ||
| avg_recall = mean([r.recall for r in results]) | ||
| avg_f1 = mean([r.f1 for r in results]) | ||
|
|
||
| # Calculate total raw values across all queries | ||
| total_relevant_found = sum(r.relevant_count for r in results) | ||
| total_documents_retrieved = sum(r.total_retrieved for r in results) | ||
| total_relevant_documents = sum(r.total_relevant for r in results) | ||
|
|
||
| return EvaluationResults( | ||
| results, | ||
| avg_precision, | ||
| avg_recall, | ||
| avg_f1, | ||
| total_relevant_found, | ||
| total_documents_retrieved, | ||
| total_relevant_documents | ||
| ) | ||
| end |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| { | ||
| "name": "documenter-search-benchmark", | ||
| "version": "1.0.0", | ||
| "description": "Search benchmarking for Documenter.jl", | ||
| "dependencies": { | ||
| "minisearch": "6.1.0" | ||
Rahban1 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| using JSON | ||
|
|
||
| # Load the real search index from test examples (already built!) | ||
| function load_real_search_index() | ||
| # Use the example search index that's already built and tested | ||
| search_index_path = joinpath(@__DIR__, "../../docs/build/search_index.js") | ||
|
|
||
| if !isfile(search_index_path) | ||
| error("Search index not found at: $search_index_path") | ||
| end | ||
|
|
||
| # Read and parse the JavaScript file | ||
| content = read(search_index_path, String) | ||
|
|
||
| # Find the JSON data after "var documenterSearchIndex = " | ||
| json_start = findfirst("var documenterSearchIndex = ", content) | ||
| if json_start === nothing | ||
| error("Invalid search index format: missing variable declaration") | ||
| end | ||
|
|
||
| # Extract JSON content (everything after the variable declaration) | ||
| json_content = content[(last(json_start) + 1):end] | ||
|
|
||
| # Parse the JSON | ||
| parsed = JSON.parse(json_content) | ||
| return parsed["docs"] # Return just the docs array | ||
| end | ||
|
|
||
| # Simple function that uses the existing search.js with real search data | ||
| function real_search(query::String) | ||
| # Load the real search index automatically | ||
| search_index_data = load_real_search_index() | ||
|
|
||
| # Read the JS wrapper and inject data | ||
| wrapper_js = read(joinpath(@__DIR__, "wrapper.js"), String) | ||
| wrapper_js = replace(wrapper_js, "__SEARCH_INDEX__" => JSON.json(search_index_data)) | ||
| wrapper_js = replace(wrapper_js, "__QUERY__" => "\"" * query * "\"") | ||
mortenpi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| # Write the wrapper to a temporary file and run it | ||
| return mktemp(@__DIR__) do path, io | ||
| write(io, wrapper_js) | ||
| close(io) | ||
| cd(@__DIR__) do | ||
| result = read(`node $path`, String) | ||
|
||
| return JSON.parse(strip(result)) | ||
| end | ||
| end | ||
| end | ||
Uh oh!
There was an error while loading. Please reload this page.