Skip to content

Commit 0909202

Browse files
authored
Merge pull request #63 from mindsdb/staging
Release 0.0.19
2 parents 8d2047f + fef7cd9 commit 0909202

File tree

6 files changed

+184
-167
lines changed

6 files changed

+184
-167
lines changed

.github/workflows/docs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ on:
88
jobs:
99
doc_build:
1010
runs-on: ubuntu-latest
11+
permissions:
12+
contents: write
1113

1214
steps:
1315
- name: checkout and set up

poetry.lock

Lines changed: 168 additions & 161 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "type_infer"
3-
version = "0.0.18"
3+
version = "0.0.19"
44
description = "Automated type inference for Machine Learning pipelines."
55
authors = ["MindsDB Inc. <hello@mindsdb.com>"]
66
license = "GPL-3.0"
@@ -19,7 +19,7 @@ psutil = "^5.9.0"
1919
toml = "^0.10.2"
2020

2121
# rule based deps, part of core
22-
langid = "^1.1.6"
22+
py3langid = ">=0.2.2,<0.3"
2323
nltk = "^3"
2424

2525
[build-system]

tests/unit_tests/test_helpers.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import unittest
22

33
from type_infer import helpers
4+
from type_infer.rule_based.helpers import get_language_dist
45

56

67
class TestCastStringToPythonType(unittest.TestCase):
@@ -32,3 +33,10 @@ def test_nan_is_numeric(self):
3233
def test_inf_is_numeric(self):
3334
self.assertTrue(helpers.is_nan_numeric('inf'))
3435
self.assertTrue(helpers.is_nan_numeric(float('inf')))
36+
37+
38+
class TestLangid(unittest.TestCase):
39+
def test_get_language_dist(self):
40+
dist = get_language_dist(['This is clearly English', 'Y esto es claramente español'])
41+
self.assertTrue('en' in dist and dist['en'] == 1)
42+
self.assertTrue('es' in dist and dist['es'] == 1)

type_infer/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from type_infer import api
44
from type_infer import helpers
55

6-
__version__ = '0.0.18'
6+
__version__ = '0.0.19'
77

88

99
__all__ = [

type_infer/rule_based/helpers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
import numpy as np
88
import scipy.stats as st
9-
from langid.langid import LanguageIdentifier
10-
from langid.langid import model as langid_model
9+
from py3langid.langid import LanguageIdentifier
10+
from py3langid.langid import MODEL_FILE as LANGID_MODEL_FILE
1111

1212
from type_infer.dtype import dtype
1313

@@ -109,7 +109,7 @@ def get_language_dist(data):
109109
lang_dist = defaultdict(lambda: 0)
110110
lang_dist['Unknown'] = 0
111111
lang_probs_cache = dict()
112-
identifier = LanguageIdentifier.from_modelstring(langid_model, norm_probs=True)
112+
identifier = LanguageIdentifier.from_pickled_model(LANGID_MODEL_FILE, norm_probs=True)
113113
for text in data:
114114
text = str(text)
115115
text = text.translate(str.maketrans('', '', string.punctuation))

0 commit comments

Comments
 (0)