1414 * 2) I/O functions were not available for all types in
1515 * in the get_datum_text_by_oid() function.
1616 *
17- * 3) SIGSEGV in case of bytea output as additional information.
17+ * 3) The output of lexeme positions in the high keys of the posting
18+ * tree is not supported.
1819 */
1920
2021#include "postgres.h"
22+ #include "miscadmin.h"
2123#include "fmgr.h"
2224#include "funcapi.h"
2325#include "catalog/namespace.h"
@@ -115,8 +117,8 @@ static Oid get_cur_attr_oid(rum_page_items_state *inter_call_data);
115117static Datum category_get_datum_text (RumNullCategory category );
116118static Oid find_add_info_oid (RumState * rum_state_ptr );
117119static OffsetNumber find_add_info_atrr_num (RumState * rum_state_ptr );
118-
119120static Datum get_positions_to_text_datum (Datum add_info );
121+ static char pos_get_weight (WordEntryPos position );
120122
121123/*
122124 * The rum_metapage_info() function is used to retrieve
@@ -472,7 +474,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
472474 */
473475 if (fctx -> call_cntr <= inter_call_data -> maxoff )
474476 {
475- RumItem * high_key_ptr ;
477+ RumItem * high_key_ptr ; /* to read high key from a page */
476478 RumItem * rum_item_ptr ; /* to read data from a page */
477479 Datum values [4 ]; /* return values */
478480 bool nulls [4 ]; /* true if the corresponding value is NULL */
@@ -497,7 +499,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
497499 values [2 ] = BoolGetDatum (high_key_ptr -> addInfoIsNull );
498500
499501 /* Returning add info */
500- if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
502+ if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
501503 && inter_call_data -> add_info_oid != BYTEAOID )
502504 {
503505 values [3 ] = get_datum_text_by_oid (high_key_ptr -> addInfo ,
@@ -506,12 +508,11 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
506508
507509 /*
508510 * In this case, we are dealing with the positions
509- * of tokens and they need to be decoded.
511+ * of lexemes and they need to be decoded.
510512 */
511- else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
513+ else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
512514 && inter_call_data -> add_info_oid == BYTEAOID )
513515 {
514- /* values[3] = get_positions_to_text_datum(high_key_ptr->addInfo); */
515516 values [3 ] = CStringGetTextDatum ("high key positions in posting tree is not supported" );
516517 }
517518
@@ -525,26 +526,8 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
525526 SRF_RETURN_NEXT (fctx , result );
526527 }
527528
528- /*
529- * Reading information from the page in rum_item.
530- *
531- * TODO: The fact is that being on the posting tree page, we don't know which
532- * index attribute this posting tree was built for, so we don't know the
533- * attribute number of the additional information. But the rumDataPageLeafRead()
534- * function requires it to read information from the page. Here we use the auxiliary
535- * function find_add_info_atr_num(), which simply iterates through the array with
536- * attributes that are additional information and selects the attribute number for
537- * which the additional information attribute is not NULL. This approach is incorrect
538- * because there may not be additional information for the attribute on the page,
539- * but we hope that in this case add_info_is_null will have the value true and the
540- * additional information will not be read.
541- *
542- * This problem can be solved by asking the user for the attribute number of
543- * additional information, because going through the index from top to bottom,
544- * he saw it next to the link to the posting tree root.
545- */
529+ /* Reading information from the page in rum_item */
546530 inter_call_data -> item_ptr = rumDataPageLeafRead (inter_call_data -> item_ptr ,
547- /* inter_call_data->cur_tuple_key_attnum, */
548531 find_add_info_atrr_num (inter_call_data -> rum_state_ptr ),
549532 rum_item_ptr , false, inter_call_data -> rum_state_ptr );
550533
@@ -554,7 +537,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
554537 values [2 ] = BoolGetDatum (rum_item_ptr -> addInfoIsNull );
555538
556539 /* Returning add info */
557- if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
540+ if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
558541 && inter_call_data -> add_info_oid != BYTEAOID )
559542 {
560543 values [3 ] = get_datum_text_by_oid (rum_item_ptr -> addInfo ,
@@ -563,9 +546,9 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
563546
564547 /*
565548 * In this case, we are dealing with the positions
566- * of tokens and they need to be decoded.
549+ * of lexemes and they need to be decoded.
567550 */
568- else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
551+ else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
569552 && inter_call_data -> add_info_oid == BYTEAOID )
570553 {
571554 values [3 ] = get_positions_to_text_datum (rum_item_ptr -> addInfo );
@@ -729,7 +712,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
729712 */
730713 if (fctx -> call_cntr <= inter_call_data -> maxoff )
731714 {
732- RumItem * high_key_ptr ;
715+ RumItem * high_key_ptr ; /* to read high key from a page */
733716 PostingItem * posting_item_ptr ; /* to read data from a page */
734717 Datum values [5 ]; /* returned values */
735718 bool nulls [5 ]; /* true if the corresponding returned value is NULL */
@@ -754,7 +737,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
754737 values [3 ] = BoolGetDatum (high_key_ptr -> addInfoIsNull );
755738
756739 /* Returning add info */
757- if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
740+ if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
758741 && inter_call_data -> add_info_oid != BYTEAOID )
759742 {
760743 values [4 ] = get_datum_text_by_oid (high_key_ptr -> addInfo ,
@@ -763,12 +746,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
763746
764747 /*
765748 * In this case, we are dealing with the positions
766- * of tokens and they need to be decoded.
749+ * of lexemes and they need to be decoded.
767750 */
768- else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
751+ else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
769752 && inter_call_data -> add_info_oid == BYTEAOID )
770753 {
771- /* values[4] = get_positions_to_text_datum(high_key_ptr->addInfo); */
772754 values [4 ] = CStringGetTextDatum ("high key positions in posting tree is not supported" );
773755 }
774756
@@ -793,7 +775,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
793775 values [3 ] = BoolGetDatum (posting_item_ptr -> item .addInfoIsNull );
794776
795777 /* Returning add info */
796- if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != 0
778+ if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != InvalidOid
797779 && inter_call_data -> add_info_oid != BYTEAOID )
798780 {
799781 values [4 ] = get_datum_text_by_oid (posting_item_ptr -> item .addInfo ,
@@ -802,12 +784,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
802784
803785 /*
804786 * In this case, we are dealing with the positions
805- * of tokens and they need to be decoded.
787+ * of lexemes and they need to be decoded.
806788 */
807- else if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != 0
789+ else if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != InvalidOid
808790 && inter_call_data -> add_info_oid == BYTEAOID )
809791 {
810- /* values[4] = get_positions_to_text_datum(posting_item_ptr->item.addInfo); */
811792 values [4 ] = CStringGetTextDatum ("high key positions in posting tree is not supported" );
812793 }
813794
@@ -1072,17 +1053,17 @@ rum_leaf_entry_page_items(PG_FUNCTION_ARGS)
10721053 values [4 ] = BoolGetDatum (rum_item_ptr -> addInfoIsNull );
10731054
10741055 /* Returning add info */
1075- if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0 &&
1056+ if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid &&
10761057 inter_call_data -> add_info_oid != BYTEAOID )
10771058 {
10781059 values [5 ] = get_datum_text_by_oid (rum_item_ptr -> addInfo , inter_call_data -> add_info_oid );
10791060 }
10801061
10811062 /*
10821063 * In this case, we are dealing with the positions
1083- * of tokens and they need to be decoded.
1064+ * of lexemes and they need to be decoded.
10841065 */
1085- else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
1066+ else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
10861067 && inter_call_data -> add_info_oid == BYTEAOID )
10871068 {
10881069 values [5 ] = get_positions_to_text_datum (rum_item_ptr -> addInfo );
@@ -1427,22 +1408,16 @@ get_page_from_raw(bytea *raw_page)
14271408 * int2, int4, int8, float4, float8, money, oid, timestamp,
14281409 * timestamptz, time, timetz, date, interval, macaddr, inet,
14291410 * cidr, text, varchar, char, bytea, bit, varbit, numeric.
1430- *
1431- * TODO: All types accepted by rum must be checked, but
1432- * perhaps some types are missing or some are superfluous.
14331411 */
14341412static Datum
14351413get_datum_text_by_oid (Datum info , Oid info_oid )
14361414{
14371415 char * str_info = NULL ;
14381416
1439- /* info cannot be NULL */
1440- Assert (DatumGetPointer (info ) != NULL );
1441-
14421417 /*
14431418 * Form a string depending on the type of info.
14441419 *
1445- * FIXME : The macros used below are taken from the
1420+ * TODO : The macros used below are taken from the
14461421 * pg_type_d file.h, and it says not to use them
14471422 * in the new code.
14481423 */
@@ -1528,18 +1503,9 @@ get_datum_text_by_oid(Datum info, Oid info_oid)
15281503 str_info = OidOutputFunctionCall (F_CHAROUT , info );
15291504 break ;
15301505
1531- /*
1532- * TODO: For some reason, the rum index created for a single tsv
1533- * field contains additional information as bytea. In addition,
1534- * if additional information in this format is extracted from
1535- * posting tree pages, it cannot be displayed correctly as text.
1536- * If the additional information was extracted from the entry
1537- * tree pages, then it is displayed correctly.
1538- */
15391506 case BYTEAOID :
1540- /* str_info = OidOutputFunctionCall(F_BYTEAOUT, info); */
1541- /* break; */
1542- return CStringGetTextDatum ("BYTEAOID is not supported" );
1507+ str_info = OidOutputFunctionCall (F_BYTEAOUT , info );
1508+ break ;
15431509
15441510 case BITOID :
15451511 str_info = OidOutputFunctionCall (F_BIT_OUT , info );
@@ -1634,14 +1600,14 @@ get_rel_raw_page(Relation rel, BlockNumber blkno)
16341600 * the Oid of additional information for an attribute for
16351601 * which it is not NULL.
16361602 *
1637- * TODO: The logic of the function assumes that there cannot
1603+ * The logic of the function assumes that there cannot
16381604 * be several types of additional information in the index,
16391605 * otherwise it will not work.
16401606 */
16411607static Oid
16421608find_add_info_oid (RumState * rum_state_ptr )
16431609{
1644- Oid add_info_oid = 0 ;
1610+ Oid add_info_oid = InvalidOid ;
16451611
16461612 /* Number of index attributes */
16471613 int num_attrs = rum_state_ptr -> origTupdesc -> natts ;
@@ -1651,8 +1617,13 @@ find_add_info_oid(RumState *rum_state_ptr)
16511617 * oid of additional information.
16521618 */
16531619 for (int i = 0 ; i < num_attrs ; i ++ )
1620+ {
16541621 if ((rum_state_ptr -> addAttrs )[i ] != NULL )
1622+ {
1623+ Assert (add_info_oid == InvalidOid );
16551624 add_info_oid = ((rum_state_ptr -> addAttrs )[i ])-> atttypid ;
1625+ }
1626+ }
16561627
16571628 return add_info_oid ;
16581629}
@@ -1661,19 +1632,28 @@ find_add_info_oid(RumState *rum_state_ptr)
16611632 * This is an auxiliary function to get the attribute number
16621633 * for additional information. It is used in the rum_leaf_data_page_items()
16631634 * function to call the rumDataPageLeafRead() function.
1635+ *
1636+ * The logic of the function assumes that there cannot
1637+ * be several types of additional information in the index,
1638+ * otherwise it will not work.
16641639 */
16651640static OffsetNumber
16661641find_add_info_atrr_num (RumState * rum_state_ptr )
16671642{
1668- OffsetNumber add_info_attr_num = 0 ;
1643+ OffsetNumber add_info_attr_num = InvalidOffsetNumber ;
16691644
16701645 /* Number of index attributes */
16711646 int num_attrs = rum_state_ptr -> origTupdesc -> natts ;
16721647
16731648 /* Go through the addAttrs array */
1674- for (int i = 0 ; i < num_attrs ; i ++ )
1649+ for (int i = 0 ; i < num_attrs ; i ++ )
1650+ {
16751651 if ((rum_state_ptr -> addAttrs )[i ] != NULL )
1652+ {
1653+ Assert (add_info_attr_num == InvalidOffsetNumber );
16761654 add_info_attr_num = i ;
1655+ }
1656+ }
16771657
16781658 /* Need to add 1 because the attributes are numbered from 1 */
16791659 return add_info_attr_num + 1 ;
@@ -1683,8 +1663,8 @@ find_add_info_atrr_num(RumState *rum_state_ptr)
16831663#define POS_MAX_VAL_LENGHT 6
16841664
16851665/*
1686- * A function for extracting the positions of tokens from additional
1687- * information. Returns a string in which the positions of the tokens
1666+ * A function for extracting the positions of lexemes from additional
1667+ * information. Returns a string in which the positions of the lexemes
16881668 * are recorded. The memory that the string occupies must be cleared later.
16891669 */
16901670static Datum
@@ -1711,14 +1691,17 @@ get_positions_to_text_datum(Datum add_info)
17111691 cur_max_str_lenght = POS_STR_BUF_LENGHT ;
17121692 positions_str_cur_ptr = positions_str ;
17131693
1714- /* Extract the positions of the tokens and put them in the string */
1694+ /* Extract the positions of the lexemes and put them in the string */
17151695 for (int i = 0 ; i < npos ; i ++ )
17161696 {
17171697 /* At each iteration decode the position */
17181698 ptrt = decompress_pos (ptrt , & position );
17191699
1720- /* Write this position in the string */
1721- sprintf (positions_str_cur_ptr , "%d," , position );
1700+ /* Write this position and weight in the string */
1701+ if (pos_get_weight (position ) == 'D' )
1702+ sprintf (positions_str_cur_ptr , "%d," , WEP_GETPOS (position ));
1703+ else
1704+ sprintf (positions_str_cur_ptr , "%d%c," , WEP_GETPOS (position ), pos_get_weight (position ));
17221705
17231706 /* Moving the pointer forward */
17241707 positions_str_cur_ptr += strlen (positions_str_cur_ptr );
@@ -1744,3 +1727,25 @@ get_positions_to_text_datum(Datum add_info)
17441727 pfree (positions_str );
17451728 return res ;
17461729}
1730+
1731+ /*
1732+ * The function extracts the weight and
1733+ * returns the corresponding letter.
1734+ */
1735+ static char
1736+ pos_get_weight (WordEntryPos position )
1737+ {
1738+ char res = 'D' ;
1739+
1740+ switch (WEP_GETWEIGHT (position ))
1741+ {
1742+ case 3 :
1743+ return 'A' ;
1744+ case 2 :
1745+ return 'B' ;
1746+ case 1 :
1747+ return 'C' ;
1748+ }
1749+
1750+ return res ;
1751+ }
0 commit comments