@@ -75,16 +75,30 @@ struct TextSegments
7575 seg.size = text.size ();
7676 allText += text;
7777 segments.insert_or_assign (seg.begin , seg);
78-
7978 }
80- else if (nodeType == NodeType::ELEMENT_NODE &&
81- wcscmp (nodeName, L" INPUT" ) == 0 )
79+ else if (nodeType == NodeType::ELEMENT_NODE)
8280 {
83- const wchar_t * type = domutils::getAttribute (nodeTree, L" type" );
84- if (!type || wcscmp (type, L" hidden" ) != 0 )
81+ if (wcscmp (nodeName, L" INPUT" ) == 0 )
82+ {
83+ const wchar_t * type = domutils::getAttribute (nodeTree, L" type" );
84+ if (!type || wcscmp (type, L" hidden" ) != 0 )
85+ {
86+ const wchar_t * value = domutils::getAttribute (nodeTree, L" value" );
87+ std::wstring text = value ? value : L" " ;
88+ TextSegment seg{};
89+ seg.nodeId = nodeTree[L" nodeId" ].GetInt ();
90+ seg.nodeType = nodeType;
91+ seg.begin = allText.size ();
92+ seg.size = text.size ();
93+ allText += text;
94+ segments.insert_or_assign (seg.begin , seg);
95+ }
96+ }
97+ else if (wcscmp (nodeName, L" TEXTAREA" ) == 0 )
8598 {
86- const wchar_t * value = domutils::getAttribute (nodeTree, L" value" );
87- std::wstring text = value ? value : L" " ;
99+ std::wstring text;
100+ for (const auto & child : nodeTree[L" children" ].GetArray ())
101+ text += child[L" nodeValue" ].GetString ();
88102 TextSegment seg{};
89103 seg.nodeId = nodeTree[L" nodeId" ].GetInt ();
90104 seg.nodeType = nodeType;
@@ -100,7 +114,8 @@ struct TextSegments
100114 wcscmp (nodeName, L" NOSCRIPT" ) != 0 &&
101115 wcscmp (nodeName, L" NOFRAMES" ) != 0 &&
102116 wcscmp (nodeName, L" STYLE" ) != 0 &&
103- wcscmp (nodeName, L" TITLE" ) != 0 )
117+ wcscmp (nodeName, L" TITLE" ) != 0 &&
118+ wcscmp (nodeName, L" TEXTAREA" ) != 0 )
104119 {
105120 for (const auto & child : nodeTree[L" children" ].GetArray ())
106121 {
0 commit comments