-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathKeywordExtractor.js
More file actions
82 lines (76 loc) · 25.9 KB
/
KeywordExtractor.js
File metadata and controls
82 lines (76 loc) · 25.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/*
https://community.verizon.com/t5/forums/searchpage/tab/message?filter=acceptedSolutions,solvedThreads,location,dateRangeType&solution=true&advanced=true&rangeTime=0&solved=true&location=category:apple&sort_by=-topicPostDate&collapse_discussion=true&search_type=thread&page=1,
https://community.verizon.com/t5/forums/searchpage/tab/message?filter=acceptedSolutions,solvedThreads,location,dateRangeType&solution=true&advanced=true&rangeTime=0&solved=true&location=category:apple&sort_by=-topicPostDate&collapse_discussion=true&search_type=thread&page=2,
https://community.verizon.com/t5/forums/searchpage/tab/message?filter=acceptedSolutions,solvedThreads,location,dateRangeType&solution=true&advanced=true&rangeTime=0&solved=true&location=category:apple&sort_by=-topicPostDate&collapse_discussion=true&search_type=thread&page=3",
https://community.verizon.com/t5/forums/searchpage/tab/message?filter=acceptedSolutions,solvedThreads,location,dateRangeType&solution=true&advanced=true&rangeTime=0&solved=true&location=category:apple&sort_by=-topicPostDate&collapse_discussion=true&search_type=thread&page=4,
https://community.verizon.com/t5/forums/searchpage/tab/message?filter=acceptedSolutions,solvedThreads,location,dateRangeType&solution=true&advanced=true&rangeTime=0&solved=true&location=category:apple&sort_by=-topicPostDate&collapse_discussion=true&search_type=thread&page=5
*/
var extract = function(doc){
var String = java.lang.String;
var e = java.lang.Exception;
/** @note
* Dictionary origin:
* https://github.com/dariusk/corpora/blob/master/data/words/nouns.json
* - converted from array to hash for better performance, however using the array would
* allow you to do more stem matching. It would, however, be a significant perforance hit.
*/
var dictiopnary = {"civility":"civility","hands":"hands","bicycle":"bicycle","mayhem":"mayhem","kangaroo":"kangaroo","accuracy":"accuracy","redistribution":"redistribution","stylus":"stylus","building":"building","carcass":"carcass","probation":"probation","enclosure":"enclosure","Lowry":"Lowry","eyewitness":"eyewitness","deference":"deference","trauma":"trauma","persona":"persona","appendix":"appendix","dealing":"dealing","mayor":"mayor","dedication":"dedication","investment":"investment","google":"google","shaving":"shaving","baker":"baker","lighting":"lighting","docking":"docking","advancement":"advancement","correlation":"correlation","dishonesty":"dishonesty","nationality":"nationality","poster":"poster","vista":"vista","cross-examination":"cross-examination","misery":"misery","Frenchman":"Frenchman","resignation":"resignation","matrix":"matrix","measurement":"measurement","miner":"miner","dollar":"dollar","opposition":"opposition","parsley":"parsley","juggernaut":"juggernaut","equator":"equator","boldness":"boldness","musician":"musician","staircase":"staircase","glucose":"glucose","pathos":"pathos","fertilization":"fertilization","mechanics":"mechanics","mathematician":"mathematician","distributor":"distributor","forefront":"forefront","invasion":"invasion","outdoors":"outdoors","robber":"robber","adage":"adage","illustration":"illustration","prominence":"prominence","chili":"chili","deliverance":"deliverance","republic":"republic","reader":"reader","annoyance":"annoyance","sufferer":"sufferer","uprising":"uprising","completeness":"completeness","provocation":"provocation","transmission":"transmission","helper":"helper","rendition":"rendition","bondage":"bondage","negligence":"negligence","intersection":"intersection","scholarship":"scholarship","magnificence":"magnificence","conversation":"conversation","monarchy":"monarchy","tracing":"tracing","defection":"defection","unhappiness":"unhappiness","foyer":"foyer","drumming":"drumming","sloth":"sloth","fusion":"fusion","curator":"curator","brunch":"brunch","sinking":"sinking","success":"success","authority":"authority","adjustment":"adjustment","panther":"panther","nourishment":"nourishment","prorogation":"prorogation","withholding":"withholding","semiconductor":"semiconductor","eloquence":"eloquence","calamity":"calamity","ascent":"ascent","grandeur":"grandeur","happiness":"happiness","provider":"provider","normalcy":"normalcy","pasta":"pasta","Mayer":"Mayer","morale":"morale","projection":"projection","reinforcement":"reinforcement","liner":"liner","delirium":"delirium","preoccupation":"preoccupation","effectiveness":"effectiveness","lodging":"lodging","ticker":"ticker","victory":"victory","attacker":"attacker","scissors":"scissors","environment":"environment","china":"china","redemption":"redemption","mover":"mover","translation":"translation","syntax":"syntax","handling":"handling","illustrator":"illustrator","tights":"tights","college":"college","self-respect":"self-respect","breadth":"breadth","movie":"movie","offense":"offense","allegation":"allegation","tendency":"tendency","artistry":"artistry","cilantro":"cilantro","cumin":"cumin","cutter":"cutter","nervousness":"nervousness","variation":"variation","wiring":"wiring","exaggeration":"exaggeration","flashing":"flashing","phrasing":"phrasing","apostle":"apostle","sweepstakes":"sweepstakes","spoiler":"spoiler","appellation":"appellation","proceedings":"proceedings","darkness":"darkness","delicacy":"delicacy","reformer":"reformer","viewpoint":"viewpoint","storyteller":"storyteller","litre":"litre","allegiance":"allegiance","recurrence":"recurrence","mutation":"mutation","leopard":"leopard","system":"system","driver":"driver","trilogy":"trilogy","coyote":"coyote","vaccination":"vaccination","iniquity":"iniquity","enlightenment":"enlightenment","sophistication":"sophistication","dumps":"dumps","quilting":"quilting","functionality":"functionality","conduit":"conduit","manufacturer":"manufacturer","honesty":"honesty","syndrome":"syndrome","accreditation":"accreditation","redundancy":"redundancy","intermission":"intermission","outcome":"outcome","adultery":"adultery","bodyguard":"bodyguard","inaccuracy":"inaccuracy","Pullman":"Pullman","synopsis":"synopsis","aesthetics":"aesthetics","climber":"climber","storey":"storey","allegory":"allegory","meantime":"meantime","prototype":"prototype","inauguration":"inauguration","framework":"framework","hacker":"hacker","altercation":"altercation","babbling":"babbling","firing":"firing","dawning":"dawning","inevitability":"inevitability","logic":"logic","mirth":"mirth","manifesto":"manifesto","depletion":"depletion","epilepsy":"epilepsy","charade":"charade","mantra":"mantra","arrears":"arrears","breaker":"breaker","traveler":"traveler","arrow":"arrow","flora":"flora","condominium":"condominium","tempo":"tempo","by-election":"by-election","degeneration":"degeneration","exploitation":"exploitation","burglary":"burglary","function":"function","fastball":"fastball","lifeblood":"lifeblood","puberty":"puberty","jenny":"jenny","line-up":"line-up","verification":"verification","actress":"actress","mahogany":"mahogany","terror":"terror","Pueblo":"Pueblo","finale":"finale","acreage":"acreage","basin":"basin","basis":"basis","sediment":"sediment","commenter":"commenter","senator":"senator","girlfriend":"girlfriend","confiscation":"confiscation","violation":"violation","archipelago":"archipelago","bingo":"bingo","joseph":"joseph","environs":"environs","outing":"outing","university":"university","ferocity":"ferocity","prisoner":"prisoner","progression":"progression","dioxide":"dioxide","scenery":"scenery","lifeline":"lifeline","righteousness":"righteousness","larceny":"larceny","laurels":"laurels","pragmatism":"pragmatism","touchdown":"touchdown","rationality":"rationality","cholera":"cholera","semicolon":"semicolon","poultry":"poultry","resurgence":"resurgence","blossom":"blossom","follower":"follower","twenties":"twenties","aircraft":"aircraft","employment":"employment","pounding":"pounding","dexterity":"dexterity","outfield":"outfield","bonding":"bonding","martin":"martin","parson":"parson","maintenance":"maintenance","policeman":"policeman","caught":"caught","blather":"blather","accessibility":"accessibility","apparatus":"apparatus","advertising":"advertising","catfish":"catfish","protein":"protein","civilisation":"civilisation","repertoire":"repertoire","sweater":"sweater","shopping":"shopping","anthropology":"anthropology","tuning":"tuning","absorption":"absorption","forgiveness":"forgiveness","relaxation":"relaxation","Snead":"Snead","runoff":"runoff","allergy":"allergy","ballet":"ballet","swimmer":"swimmer","vulnerability":"vulnerability","processor":"processor","Barrymore":"Barrymore","patchwork":"patchwork","repentance":"repentance","cartridge":"cartridge","reassurance":"reassurance","duchess":"duchess","demonstration":"demonstration","coaster":"coaster","flask":"flask","archery":"archery","complexity":"complexity","workman":"workman","mobility":"mobility","anomaly":"anomaly","confidant":"confidant","housing":"housing","rapport":"rapport","battery":"battery","colors":"colors","ligament":"ligament","diversity":"diversity","envoy":"envoy","chasm":"chasm","scenario":"scenario","tractor":"tractor","advertisement":"advertisement","helplessness":"helplessness","stillness":"stillness","taker":"taker","cornerstone":"cornerstone","ankle":"ankle","cemetery":"cemetery","retention":"retention","legislation":"legislation","siding":"siding","cropping":"cropping","horror":"horror","disarmament":"disarmament","ratification":"ratification","intolerance":"intolerance","deflation":"deflation","liberation":"liberation","watchdog":"watchdog","perusal":"perusal","sarcasm":"sarcasm","gunman":"gunman","imperialism":"imperialism","toxicity":"toxicity","organisation":"organisation","accounting":"accounting","slugger":"slugger","irrigation":"irrigation","pendulum":"pendulum","commemoration":"commemoration","homer":"homer","appeasement":"appeasement","juror":"juror","molasses":"molasses","bookmark":"bookmark","crossroads":"crossroads","proximity":"proximity","solemnity":"solemnity","telephony":"telephony","consul":"consul","patriotism":"patriotism","clothing":"clothing","playing":"playing","neighborhood":"neighborhood","thunderstorm":"thunderstorm","martyrdom":"martyrdom","accomplice":"accomplice","Easter":"Easter","crocodile":"crocodile","memory":"memory","absence":"absence","commune":"commune","physique":"physique","councilman":"councilman","simplicity":"simplicity","thicket":"thicket","agriculture":"agriculture","grocer":"grocer","statement":"statement","Tuesday":"Tuesday","witchcraft":"witchcraft","outage":"outage","inaction":"inaction","intruder":"intruder","Chihuahua":"Chihuahua","seating":"seating","momentum":"momentum","savior":"savior","cracker":"cracker","brightness":"brightness","medics":"medics","disobedience":"disobedience","earnestness":"earnestness","ardor":"ardor","stairway":"stairway","feces":"feces","cabbage":"cabbage","innocence":"innocence","dominion":"dominion","subsidy":"subsidy","ammonia":"ammonia","consistency":"consistency","digger":"digger","billing":"billing","conflagration":"conflagration","Syrah":"Syrah","circus":"circus","fetish":"fetish","gallery":"gallery","transmitter":"transmitter","vomiting":"vomiting","metre":"metre","seeder":"seeder","warmth":"warmth","infamy":"infamy","dossier":"dossier","ambulance":"ambulance","unification":"unification","grouping":"grouping","mythology":"mythology","brunt":"brunt","passenger":"passenger","bounds":"bounds","standpoint":"standpoint","stanza":"stanza","hoarding":"hoarding","widget":"widget","bomber":"bomber","endangerment":"endangerment","self-control":"self-control","disservice":"disservice","formation":"formation","mailing":"mailing","tourist":"tourist","hardship":"hardship","cheesecake":"cheesecake","plateau":"plateau","comrade":"comrade","fries":"fries","demon":"demon","scooter":"scooter","architecture":"architecture","hammock":"hammock","semifinal":"semifinal","souvenir":"souvenir","wealth":"wealth","editor":"editor","means":"means","electricity":"electricity","tablespoon":"tablespoon","earnings":"earnings","absurdity":"absurdity","drafting":"drafting","organizer":"organizer","width":"width","dominance":"dominance","heads":"heads","indicator":"indicator","oasis":"oasis","disability":"disability","appointment":"appointment","aviation":"aviation","polling":"polling","retirement":"retirement","cyclone":"cyclone","precedence":"precedence","obscurity":"obscurity","attractiveness":"attractiveness","notification":"notification","commander":"commander","speculation":"speculation","womanhood":"womanhood","licence":"licence","indecency":"indecency","shoplifting":"shoplifting","fixing":"fixing","supposition":"supposition","allotment":"allotment","glamour":"glamour","nutrition":"nutrition","instructor":"instructor","novella":"novella","bones":"bones","eater":"eater","skate":"skate","solicitation":"solicitation","imprisonment":"imprisonment","inspiration":"inspiration","drunkenness":"drunkenness","footing":"footing","expediency":"expediency","comedian":"comedian","rocks":"rocks","devolution":"devolution","specimen":"specimen","piles":"piles","hegemony":"hegemony","cynicism":"cynicism","soccer":"soccer","artisan":"artisan","philosopher":"philosopher","actuality":"actuality","adherence":"adherence","assignment":"assignment","nylon":"nylon","therapy":"therapy","stimulus":"stimulus","posting":"posting","colleague":"colleague","kingdom":"kingdom","terrier":"terrier","airtime":"airtime","sorcery":"sorcery","posterity":"posterity","depression":"depression","fulfillment":"fulfillment","vanguard":"vanguard","headcount":"headcount","gallantry":"gallantry","banjo":"banjo","horseman":"horseman","parliament":"parliament","hamburger":"hamburger","aversion":"aversion","scolding":"scolding","custard":"custard","announcement":"announcement","servitude":"servitude","Rodeo":"Rodeo","scouring":"scouring","hierarchy":"hierarchy","locality":"locality","opportunity":"opportunity","tourney":"tourney","proceeding":"proceeding","enthusiasm":"enthusiasm","propensity":"propensity","onslaught":"onslaught","interpretation":"interpretation","saloon":"saloon","applause":"applause","jones":"jones","fundraiser":"fundraiser","spaghetti":"spaghetti","homeland":"homeland","spectre":"spectre","scarcity":"scarcity","comer":"comer","thriller":"thriller","annuity":"annuity","viability":"viability","recourse":"recourse","refinery":"refinery","coding":"coding","adoption":"adoption","extinction":"extinction","involvement":"involvement","peter":"peter","insurer":"insurer","groundwork":"groundwork","assembly":"assembly","embodiment":"embodiment","schooner":"schooner","volcano":"volcano","ignition":"ignition","radiance":"radiance","innovation":"innovation","retailer":"retailer","waitress":"waitress","apartheid":"apartheid","captaincy":"captaincy","words":"words","biography":"biography","self-confidence":"self-confidence","embroidery":"embroidery","chivalry":"chivalry","auspices":"auspices","vicinity":"vicinity","validity":"validity","penguin":"penguin","treadmill":"treadmill","celebrity":"celebrity","battling":"battling","dialect":"dialect","putting":"putting","priesthood":"priesthood","stamina":"stamina","listener":"listener","corpus":"corpus","schism":"schism","bowling":"bowling","denomination":"denomination","plurality":"plurality","boardroom":"boardroom","detention":"detention","beginner":"beginner","lookout":"lookout","snark":"snark","underwear":"underwear","commissioner":"commissioner","laborer":"laborer","brewer":"brewer","brainstorming":"brainstorming","baron":"baron","emancipation":"emancipation","seduction":"seduction","ralph":"ralph","consultancy":"consultancy","espionage":"espionage","rapidity":"rapidity","quorum":"quorum","prospectus":"prospectus","anxiety":"anxiety","acceptance":"acceptance","immunization":"immunization","scrum":"scrum","boasting":"boasting","terry":"terry","mediator":"mediator","refrigerator":"refrigerator","cursor":"cursor","grappling":"grappling","relativism":"relativism","doorstep":"doorstep","prophecy":"prophecy","dugout":"dugout","owner":"owner","specification":"specification","advert":"advert","permission":"permission","tyrant":"tyrant","pottery":"pottery","blackberry":"blackberry","encouragement":"encouragement","mowing":"mowing","setup":"setup","cylinder":"cylinder","developer":"developer","bedtime":"bedtime","rubbing":"rubbing","atheism":"atheism","helping":"helping","simulation":"simulation","encampment":"encampment","chemotherapy":"chemotherapy","collaborator":"collaborator","majority":"majority","dispatcher":"dispatcher","hallway":"hallway","pundit":"pundit","doorway":"doorway","youngster":"youngster","certification":"certification","plethora":"plethora","culprit":"culprit","birthplace":"birthplace","contents":"contents","cheerleader":"cheerleader","interval":"interval","hello":"hello","snapshot":"snapshot","affinity":"affinity","machinery":"machinery","nobility":"nobility","template":"template","anatomy":"anatomy","weariness":"weariness","eagerness":"eagerness","broth":"broth","neighbour":"neighbour","noodle":"noodle","subscription":"subscription","diver":"diver","abbey":"abbey","literature":"literature","boomer":"boomer","reinstatement":"reinstatement","clearance":"clearance","sending":"sending","countryman":"countryman","illness":"illness","juncture":"juncture","creator":"creator","mentality":"mentality","restaurant":"restaurant","vogue":"vogue","clearing":"clearing","to-day":"to-day","precinct":"precinct","whereabouts":"whereabouts","discovery":"discovery","instruction":"instruction","seriousness":"seriousness","residency":"residency","scrimmage":"scrimmage","freestyle":"freestyle","pessimism":"pessimism","domination":"domination","backlash":"backlash","bonus":"bonus","mouthpiece":"mouthpiece","recognition":"recognition","networking":"networking","cassette":"cassette","contentment":"contentment","heath":"heath","plaza":"plaza","brotherhood":"brotherhood","falls":"falls","orchestra":"orchestra","manga":"manga","pretense":"pretense","oxygen":"oxygen","phosphorus":"phosphorus","lordship":"lordship","foothold":"foothold","daybreak":"daybreak","poker":"poker","diagnosis":"diagnosis","digress":"digress","formality":"formality","dentist":"dentist","headquarters":"headquarters","subcommittee":"subcommittee","deduction":"deduction","dismissal":"dismissal","header":"header","designation":"designation","nexus":"nexus","differentiation":"differentiation","potassium":"potassium","abundance":"abundance","recipe":"recipe","schoolboy":"schoolboy","hostility":"hostility","regularity":"regularity","misunderstanding":"misunderstanding","leadership":"leadership","vampire":"vampire","paradox":"paradox","Sister":"Sister","cyclist":"cyclist","vocalist":"vocalist","downside":"downside","influx":"influx","headset":"headset","equation":"equation","constable":"constable","Christianity":"Christianity","mango":"mango","clerk":"clerk","attraction":"attraction","narrator":"narrator","receptor":"receptor","accommodation":"accommodation","maple":"maple","roadblock":"roadblock","suburb":"suburb","Catholicism":"Catholicism","villa":"villa","lineage":"lineage","windfall":"windfall","originality":"originality","adaptation":"adaptation","dwelling":"dwelling","intercourse":"intercourse","apologise":"apologise","somewhere":"somewhere","Saturday":"Saturday","observer":"observer","dynamics":"dynamics","rebirth":"rebirth","diploma":"diploma","metaphysics":"metaphysics","obscenity":"obscenity","suspension":"suspension","height":"height","browsing":"browsing","pointer":"pointer","insanity":"insanity","underdog":"underdog","recreation":"recreation","asthma":"asthma","hurricane":"hurricane","tiger":"tiger","rancher":"rancher","processing":"processing","allies":"allies","mainframe":"mainframe","concur":"concur","waistcoat":"waistcoat","crossover":"crossover","serenity":"serenity","seizure":"seizure","postponement":"postponement","outset":"outset","jogging":"jogging","association":"association","fragmentation":"fragmentation","bases":"bases","counselor":"counselor","ma'am":"ma'am","accountability":"accountability","booty":"booty","liquidation":"liquidation","reaction":"reaction","drilling":"drilling","reviewer":"reviewer","gridlock":"gridlock","bureaucracy":"bureaucracy","mortality":"mortality","stagnation":"stagnation","operation":"operation","acquiescence":"acquiescence","rector":"rector","salvation":"salvation","trout":"trout","reputation":"reputation","exclamation":"exclamation","frequency":"frequency","genesis":"genesis","specs":"specs","whisky":"whisky","tavern":"tavern","longevity":"longevity","futility":"futility","wardrobe":"wardrobe","loathing":"loathing","demeanor":"demeanor","oracle":"oracle","murderer":"murderer","countdown":"countdown","Orientalism":"Orientalism","stole":"stole","anemia":"anemia","reins":"reins","coconut":"coconut","criminality":"criminality","bending":"bending","intercession":"intercession","discord":"discord","squad":"squad","Woodward":"Woodward","campaign":"campaign","regulator":"regulator","conflict":"conflict","saucer":"saucer","overseer":"overseer","snack":"snack","ranger":"ranger","broccoli":"broccoli","donkey":"donkey","barrister":"barrister","privacy":"privacy","jurisprudence":"jurisprudence","monument":"monument","curfew":"curfew","misconception":"misconception","stocks":"stocks","resentment":"resentment","examination":"examination","definition":"definition","staging":"staging","coverage":"coverage","evacuation":"evacuation","allocation":"allocation","damsel":"damsel","madness":"madness","certainty":"certainty","technology":"technology","theology":"theology","buyout":"buyout","voltage":"voltage","layout":"layout","Armour":"Armour","deficiency":"deficiency","litigation":"litigation","riches":"riches","pairing":"pairing","formula":"formula","outpost":"outpost","closeness":"closeness","humility":"humility","competition":"competition","repository":"repository","thoroughfare":"thoroughfare","consist":"consist","feedback":"feedback","folklore":"folklore","melodrama":"melodrama","sharpness":"sharpness","nobody":"nobody","mishap":"mishap","repayment":"repayment","spectacle":"spectacle","biology":"biology","method":"method","drank":"drank","infiltration":"infiltration","forefinger":"forefinger","nursery":"nursery","providence":"providence","emperor":"emperor","motto":"motto","contractor":"contractor","alligator":"alligator","immunity":"immunity","distribution":"distribution","cabal":"cabal","Cabot":"Cabot","canonization":"canonization","frigate":"frigate","sewing":"sewing","solitude":"solitude","hockey":"hockey","gusto":"gusto","nucleus":"nucleus","sperm":"sperm","aggression":"aggression","consolidation":"consolidation","genitals":"genitals","whiteness":"whiteness","elements":"elements","vindication":"vindication","avarice":"avarice","snail":"snail","begun":"begun","threshold":"threshold","check-in":"check-in","leasing":"leasing","barrier":"barrier","conspiracy":"conspiracy","knocking":"knocking","theater":"theater","tycoon":"tycoon","publicity":"publicity","fascism":"fascism","tossing":"tossing","likeness":"likeness","nuisance":"nuisance","nightmare":"nightmare","lustre":"lustre","estimation":"estimation","collision":"collision","ducking":"ducking","tendon":"tendon","combustion":"combustion","fraud":"fraud","mouthful":"mouthful","publisher":"publisher","glitter":"glitter","sesame":"sesame","departure":"departure","squid":"squid","publicist":"publicist","handshake":"handshake","covering":"covering","configuration":"configuration","advice":"advice","balls":"balls","friendliness":"friendliness","interact":"interact","lineup":"lineup","vegetation":"vegetation","error":"error","despotism":"despotism","quart":"quart","computing":"computing","stoppage":"stoppage","pneumonia":"pneumonia","hearth":"hearth","abstinence":"abstinence","hearts":"hearts","iceberg":"iceberg","appraisal":"appraisal","fluke":"fluke","iteration":"iteration","ultimatum":"ultimatum","orientation":"orientation","racism":"racism","quantity":"quantity","habitation":"habitation","Pharaoh":"Pharaoh","inactivity":"inactivity","greens":"greens","carrier":"carrier","daddy":"daddy","clutches":"clutches","narration":"narration","ounce":"ounce","bullion":"bullion","delegation":"delegation","matron":"matron","standstill":"standstill"};
try{
var body = doc.getFirstFieldValue("body_t"); // get the text corpus. Alter this to where your body field is mapped.
logger.info(" *** 1 *** ");
var keywords = getMatches(body); // check the text corpus for matches
// make sure we have an array
if(!keywords || !keywords.add){
keywords = [];
}
var bodyArray = body.split(" "); // split the corpus into an array
logger.info(" *** body arrY *** " +bodyArray.length);
for(var i=0; i<bodyArray.length; i++){
var word = new String(bodyArray[i]);
if(dictiopnary[word]){ // if word is in the dictionary, add it to the keywords.
logger.info("**** add WORd " + bodyArray[i]);
keywords.push(bodyArray[i]);
}
}
doc.setField("keywords", keywords)// field should be a multi-valued string field.
logger.info("KEYWORDS **** " + JSON.stringify(keywords));
}catch(e){
logger.error("***** ERROR ***** ");
logger.error(e.toString());
}
/* Examples. These are patterns, not specific phrases that will be matched.
iPhone 12 :: /[a-zA-Z]+\s[0-9]\d/g
iPhone XR :: /[i][P][a-zA-Z]+\s[A-Z]{1,2}/g
256GB / 128gb :: /[0-9]{0,5}\d[a-zA-Z]+\s/g
4G/5G :: /[0-9][A-Z]/g /[0-9][g|G|gb|GB]/g
6.8 :: /[0-9].[0-9]/g
Nokia / Prepaid :: /(?:[A-Z]{1,1}[a-z]{4,20})/g
PureView :: /([A-Z][a-z]+)(?:([A-Z]{1})([a-z]+))+/gm
IP6X / 5G :: /[A-Z]{0,2}[0-9][A-Z]{1,2}/g
6.1-inch :: /([0-9].[0-9].[a-z]{1,5})/g
iPhone 12 / iPhone XR /[i][P][a-zA-Z]+\s[A-Z|0-9]{1,2}/g
IP6X :: /[A-Z]\w[0-9][A-Z]/g
*/
function getMatches(str){
var regarr = [
/[a-zA-Z]+\s[0-9]\d/g,
/[i][P][a-zA-Z]+\s[A-Z]{1,2}/g,
/[0-9]{0,5}\d[a-zA-Z]+\s/g,
/[0-9][A-Z]/g ,
/[0-9][g|G|gb|GB]/g,
/[0-9].[0-9]/g,
/(?:[A-Z]{1,1}[a-z]{4,20})/g,
/([A-Z][a-z]+)(?:([A-Z]{1})([a-z]+))+/gm,
/([0-9].[0-9].[a-z]{1,5})/g,
/[i][P][a-zA-Z]+\s[A-Z|0-9]{1,2}/g,
/[A-Z]\w[0-9][A-Z]/g
];
var result = new Array();
for(var i=0; i<regarr.length; i++){
var items = str.match(regarr[i]);
result.concat(items);
}
return result
}
return doc;
}