|
4 | 4 | import logging |
5 | 5 | import os |
6 | 6 | import pickle |
| 7 | +import re |
7 | 8 | import subprocess |
8 | 9 | import sys |
9 | 10 | from collections import namedtuple |
@@ -36,28 +37,21 @@ def get_first_sentence(cls, text): |
36 | 37 | The first sentence found in the text, or the entire text if no sentence |
37 | 38 | boundary is found. |
38 | 39 | """ |
39 | | - # Split the text into lines |
40 | | - lines = text.splitlines() |
| 40 | + if not text: |
| 41 | + return "" |
41 | 42 |
|
42 | | - # Trim leading and trailing whitespace from each line and ignore completely blank lines |
43 | | - lines = [line.strip() for line in lines] |
| 43 | + text = text.replace('\n', ' ') |
| 44 | + # Split by double spaces to get paragraphs |
| 45 | + paragraphs = text.split(' ') |
| 46 | + first_paragraph = paragraphs[0].strip() |
44 | 47 |
|
45 | | - if not lines: |
46 | | - return "" |
| 48 | + # Look for a period followed by a space in the first paragraph |
| 49 | + period_match = re.search(r'(.*?)\.(?:\s|$)', first_paragraph) |
| 50 | + if period_match: |
| 51 | + return period_match.group(1).strip() |
47 | 52 |
|
48 | | - # Case 1: Single line followed by blank line(s) or end of text |
49 | | - if len(lines) == 1 or (len(lines) > 1 and lines[1] == ""): |
50 | | - first_line = lines[0] |
51 | | - # Check for the first period |
52 | | - period_index = first_line.find(".") |
53 | | - # If there's a period, return up to the period; otherwise, return the full line |
54 | | - return first_line[: period_index + 1] if period_index != -1 else first_line |
55 | | - |
56 | | - # Case 2: Multiple contiguous lines, treat as a block |
57 | | - block = " ".join(lines) |
58 | | - period_index = block.find(".") |
59 | | - # If there's a period, return up to the period; otherwise, return the full block |
60 | | - return block[: period_index + 1] if period_index != -1 else block |
| 53 | + # If no period in the first paragraph, return the entire first paragraph |
| 54 | + return first_paragraph |
61 | 55 |
|
62 | 56 | @classmethod |
63 | 57 | def get_cached_description(cls, node): |
|
0 commit comments