Skip to content

Commit 885d494

Browse files
committed
Added ngram search for datastore keys
1 parent c273088 commit 885d494

File tree

3 files changed

+282
-1
lines changed

3 files changed

+282
-1
lines changed

correlations.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
package shuffle
22

33
import (
4+
"net/http"
45
"fmt"
6+
"io/ioutil"
7+
"encoding/json"
8+
"log"
9+
"strings"
510
)
611

712
func GetCorrelations(resp http.ResponseWriter, request *http.Request) {
@@ -10,6 +15,14 @@ func GetCorrelations(resp http.ResponseWriter, request *http.Request) {
1015
return
1116
}
1217

18+
user, err := HandleApiAuthentication(resp, request)
19+
if err != nil {
20+
log.Printf("[AUDIT] Authentication failed in GetCorrelations: %s", err)
21+
resp.WriteHeader(401)
22+
resp.Write([]byte(`{"success": false, "reason": "Authentication failed"}`))
23+
return
24+
}
25+
1326
body, err := ioutil.ReadAll(request.Body)
1427
if err != nil {
1528
log.Printf("[WARNING] Failed to read body in GetCorrelations: %s", err)
@@ -18,4 +31,68 @@ func GetCorrelations(resp http.ResponseWriter, request *http.Request) {
1831
return
1932
}
2033

34+
log.Printf("[DEBUG] GetCorrelations request body: %s", string(body))
35+
36+
correlationData := CorrelationRequest{}
37+
err = json.Unmarshal(body, &correlationData)
38+
if err != nil {
39+
log.Printf("[WARNING] Failed to parse JSON in GetCorrelations: %s", err)
40+
resp.WriteHeader(400)
41+
resp.Write([]byte(`{"success": false, "reason": "Invalid JSON format"}`))
42+
return
43+
}
44+
45+
log.Printf("Correlation request data: %#v", correlationData)
46+
47+
// Process correlationData as needed
48+
if correlationData.OrgId != user.ActiveOrg.Id {
49+
log.Printf("[AUDIT] User %s attempted to access correlations for org %d", user.Username, correlationData.OrgId)
50+
resp.WriteHeader(403)
51+
resp.Write([]byte(`{"success": false, "reason": "Forbidden"}`))
52+
return
53+
}
54+
55+
searchKey := fmt.Sprintf("%s|%s", correlationData.Category, correlationData.Key)
56+
57+
availableTypes := []string{"datastore"}
58+
if correlationData.Type == "datastore" {
59+
// Nothing to do as we have the right key already
60+
} else {
61+
log.Printf("[WARNING] Invalid type in GetCorrelations: %#v. Available types: %#v", correlationData.Type, strings.Join(availableTypes, ", "))
62+
resp.WriteHeader(400)
63+
resp.Write([]byte(`{"success": false, "reason": "Invalid type"}`))
64+
return
65+
}
66+
67+
ctx := GetContext(request)
68+
correlations, err := GetDatastoreNgramItems(ctx, correlationData.OrgId, searchKey, 50)
69+
if err != nil {
70+
log.Printf("[ERROR] Failed to get correlations from DB in GetCorrelations: %s", err)
71+
resp.WriteHeader(500)
72+
resp.Write([]byte(`{"success": false, "reason": "Internal server error"}`))
73+
return
74+
}
75+
76+
newCorrelations := []NGramItem{}
77+
for _, item := range correlations {
78+
if item.OrgId != correlationData.OrgId {
79+
continue
80+
}
81+
82+
item.OrgId = ""
83+
newCorrelations = append(newCorrelations, item)
84+
}
85+
86+
correlations = newCorrelations
87+
marshalledCorrelations, err := json.Marshal(correlations)
88+
if err != nil {
89+
log.Printf("[ERROR] Failed to marshal correlations in GetCorrelations: %s", err)
90+
resp.WriteHeader(500)
91+
resp.Write([]byte(`{"success": false, "reason": "Internal server error: Failed to marshal correlations"}`))
92+
return
93+
}
94+
95+
resp.WriteHeader(200)
96+
resp.Write([]byte(marshalledCorrelations))
97+
2198
}

db-connector.go

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16735,7 +16735,179 @@ func SetDatastoreNGramItem(ctx context.Context, key string, ngramItem *NGramItem
1673516735
return nil
1673616736
}
1673716737

16738+
func GetDatastoreNgramItems(ctx context.Context, orgId, searchKey string, maxAmount int) ([]NGramItem, error) {
16739+
var items []NGramItem
16740+
var err error
16741+
16742+
nameKey := "datastore_ngram"
16743+
if project.DbType == "opensearch" {
16744+
var buf bytes.Buffer
16745+
16746+
query := map[string]interface{}{
16747+
"size": maxAmount,
16748+
"query": map[string]interface{}{
16749+
"bool": map[string]interface{}{
16750+
"must": []map[string]interface{}{
16751+
map[string]interface{}{
16752+
"match": map[string]interface{}{
16753+
"org_id": orgId,
16754+
},
16755+
},
16756+
map[string]interface{}{
16757+
"match": map[string]interface{}{
16758+
"ref": searchKey,
16759+
},
16760+
},
16761+
},
16762+
},
16763+
},
16764+
}
16765+
16766+
if err := json.NewEncoder(&buf).Encode(query); err != nil {
16767+
log.Printf("[WARNING] Error encoding find user query: %s", err)
16768+
return items, err
16769+
}
16770+
16771+
resp, err := project.Es.Search(ctx, &opensearchapi.SearchReq{
16772+
Indices: []string{strings.ToLower(GetESIndexPrefix(nameKey))},
16773+
Body: &buf,
16774+
Params: opensearchapi.SearchParams{
16775+
TrackTotalHits: true,
16776+
},
16777+
})
16778+
if err != nil {
16779+
if strings.Contains(err.Error(), "index_not_found_exception") {
16780+
return items, nil
16781+
}
16782+
16783+
log.Printf("[ERROR] Error getting response from Opensearch (get ngram items): %s", err)
16784+
return items, err
16785+
}
16786+
16787+
res := resp.Inspect().Response
16788+
defer res.Body.Close()
16789+
if res.StatusCode == 404 {
16790+
return items, nil
16791+
}
16792+
16793+
if res.IsError() {
16794+
var e map[string]interface{}
16795+
if err := json.NewDecoder(res.Body).Decode(&e); err != nil {
16796+
log.Printf("[WARNING] Error parsing the response body: %s", err)
16797+
return items, err
16798+
} else {
16799+
// Print the response status and error information.
16800+
log.Printf("[%s] %s: %s",
16801+
res.Status(),
16802+
e["error"].(map[string]interface{})["type"],
16803+
e["error"].(map[string]interface{})["reason"],
16804+
)
16805+
}
16806+
}
16807+
16808+
if res.StatusCode != 200 && res.StatusCode != 201 {
16809+
return items, errors.New(fmt.Sprintf("Bad statuscode: %d", res.StatusCode))
16810+
}
16811+
16812+
respBody, err := ioutil.ReadAll(res.Body)
16813+
if err != nil {
16814+
return items, err
16815+
}
16816+
16817+
wrapped := NGramSearchWrapper{}
16818+
err = json.Unmarshal(respBody, &wrapped)
16819+
if err != nil {
16820+
return items, err
16821+
}
16822+
16823+
//log.Printf("Found items: %d", len(wrapped.Hits.Hits))
16824+
for _, hit := range wrapped.Hits.Hits {
16825+
if hit.Source.Key == "" {
16826+
continue
16827+
}
16828+
16829+
if hit.Source.OrgId == orgId {
16830+
items = append(items, hit.Source)
16831+
}
16832+
}
16833+
16834+
} else {
16835+
16836+
if len(orgId) == 0 {
16837+
return items, errors.New("No org to find ngrams for found")
16838+
}
16839+
16840+
cursorStr := ""
16841+
query := datastore.NewQuery(nameKey).Filter("OrgId =", orgId).Filter("Ref =", searchKey).Limit(maxAmount)
16842+
for {
16843+
it := project.Dbclient.Run(ctx, query)
16844+
if len(items) >= maxAmount {
16845+
break
16846+
}
16847+
16848+
for {
16849+
innerItem := NGramItem{}
16850+
_, err = it.Next(&innerItem)
16851+
if err != nil {
16852+
if strings.Contains(fmt.Sprintf("%s", err), "cannot load field") {
16853+
16854+
} else {
16855+
if !strings.Contains(fmt.Sprintf("%s", err), "no more items in iterator") {
16856+
log.Printf("[WARNING] NGram iterator issue: %s", err)
16857+
}
16858+
16859+
break
16860+
}
16861+
}
16862+
16863+
found := false
16864+
for _, loopedItem := range items {
16865+
if loopedItem.Key == innerItem.Key {
16866+
found = true
16867+
break
16868+
}
16869+
}
16870+
16871+
if !found {
16872+
items = append(items, innerItem)
16873+
}
16874+
16875+
if len(items) >= maxAmount {
16876+
break
16877+
}
16878+
}
16879+
16880+
if err != iterator.Done {
16881+
log.Printf("[INFO] Failed fetching ngrams: %v", err)
16882+
break
16883+
}
16884+
16885+
// Get the cursor for the next page of results.
16886+
nextCursor, err := it.Cursor()
16887+
if err != nil {
16888+
log.Printf("[ERROR] Problem with cursor (ngram): %s", err)
16889+
break
16890+
} else {
16891+
nextStr := fmt.Sprintf("%s", nextCursor)
16892+
if cursorStr == nextStr {
16893+
break
16894+
}
16895+
16896+
cursorStr = nextStr
16897+
query = query.Start(nextCursor)
16898+
}
16899+
}
16900+
}
16901+
16902+
if len(items) > maxAmount {
16903+
items = items[:maxAmount]
16904+
}
16905+
16906+
return items, nil
16907+
}
16908+
1673816909
// Key itself contains the orgId so this should "just work"
16910+
// To get ALL items matching a key, use GetDatastoreNgramItems()
1673916911
func GetDatastoreNGramItem(ctx context.Context, key string) (*NGramItem, error) {
1674016912

1674116913
nameKey := "datastore_ngram"

structs.go

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ type AppContext struct {
1515
Example string `json:"example,omitempty" datastore:"example,noindex"`
1616
}
1717

18+
type CorrelationRequest struct {
19+
Type string `json:"type"`
20+
Key string `json:"key"`
21+
Category string `json:"category"`
22+
OrgId string `json:"org_id"`
23+
}
24+
1825
type LogRequest struct {
1926
Timestamp int64 `json:"timestamp"`
2027

@@ -2432,6 +2439,31 @@ type FileSearchWrapper struct {
24322439
} `json:"hits"`
24332440
}
24342441

2442+
type NGramSearchWrapper struct {
2443+
Took int `json:"took"`
2444+
TimedOut bool `json:"timed_out"`
2445+
Shards struct {
2446+
Total int `json:"total"`
2447+
Successful int `json:"successful"`
2448+
Skipped int `json:"skipped"`
2449+
Failed int `json:"failed"`
2450+
} `json:"_shards"`
2451+
Hits struct {
2452+
Total struct {
2453+
Value int `json:"value"`
2454+
Relation string `json:"relation"`
2455+
} `json:"total"`
2456+
MaxScore float64 `json:"max_score"`
2457+
Hits []struct {
2458+
Index string `json:"_index"`
2459+
Type string `json:"_type"`
2460+
ID string `json:"_id"`
2461+
Score float64 `json:"_score"`
2462+
Source NGramItem `json:"_source"`
2463+
} `json:"hits"`
2464+
} `json:"hits"`
2465+
}
2466+
24352467
type WorkflowSearchWrapper struct {
24362468
Took int `json:"took"`
24372469
TimedOut bool `json:"timed_out"`
@@ -4651,7 +4683,7 @@ type MinimalWorkflow struct {
46514683

46524684
type NGramItem struct {
46534685
Key string `json:"key"`
4654-
OrgId string `json:"org_id"`
4686+
OrgId string `json:"org_id,omitempty"`
46554687

46564688
Amount int `json:"amount"`
46574689
Ref []string `json:"ref"` // Reference to other items

0 commit comments

Comments
 (0)